Ece Koç - 201805007
Emircan Karagöz - 191805019
First Part
Import Packages
In [1]:
import scispacy
import spacy
#Core models
import en_core_sci_sm
import en_core_sci_md
#NER specific models
import en_ner_bc5cdr_md
#Tools for extracting & displaying data
from spacy import displacy
import pandas as pd
Load Data
In [2]:
import pandas as pd
mtsample_df=pd.read_csv("mtsamples.csv")
In [3]:
print(mtsample_df.shape)
(4999, 6)
In [4]:
print(mtsample_df.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4999 entries, 0 to 4998 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 4999 non-null int64 1 description 4999 non-null object 2 medical_specialty 4999 non-null object 3 sample_name 4999 non-null object 4 transcription 4966 non-null object 5 keywords 3931 non-null object dtypes: int64(1), object(5) memory usage: 234.5+ KB None
Test the models with sample data
In [5]:
# Pick specific transcription to use (row 3, column "transcription") and test the scispacy NER model
text = mtsample_df.loc[10, "transcription"]
print(text)
PREOPERATIVE DIAGNOSIS: , Morbid obesity. ,POSTOPERATIVE DIAGNOSIS: , Morbid obesity. ,PROCEDURE:, Laparoscopic Roux-en-Y gastric bypass, antecolic, antegastric with 25-mm EEA anastamosis, esophagogastroduodenoscopy. ,ANESTHESIA: , General with endotracheal intubation. ,INDICATIONS FOR PROCEDURE: , This is a 50-year-old male who has been overweight for many years and has tried multiple different weight loss diets and programs. The patient has now begun to have comorbidities related to the obesity. The patient has attended our bariatric seminar and met with our dietician and psychologist. The patient has read through our comprehensive handout and understands the risks and benefits of bypass surgery as evidenced by the signing of our consent form.,PROCEDURE IN DETAIL: , The risks and benefits were explained to the patient. Consent was obtained. The patient was taken to the operating room and placed supine on the operating room table. General anesthesia was administered with endotracheal intubation. A Foley catheter was placed for bladder decompression. All pressure points were carefully padded, and sequential compression devices were placed on the legs. The abdomen was prepped and draped in standard, sterile, surgical fashion. Marcaine was injected into the umbilicus.
Load specific model: en_core_sci_sm and pass 'text' through
In [6]:
nlp_sm = en_core_sci_sm.load()
doc = nlp_sm(text)
#Display resulting entity extraction
displacy_image = displacy.render(doc, jupyter=True,style='ent')
PREOPERATIVE
ENTITY
DIAGNOSIS
ENTITY
: ,
Morbid obesity
ENTITY
. ,
POSTOPERATIVE DIAGNOSIS
ENTITY
: ,
Morbid obesity
ENTITY
. ,
PROCEDURE
ENTITY
:, Laparoscopic Roux-en-Y gastric bypass,
antecolic
ENTITY
,
antegastric
ENTITY
with 25-mm
EEA
ENTITY
anastamosis
ENTITY
,
esophagogastroduodenoscopy
ENTITY
. ,
ANESTHESIA
ENTITY
: ,
General with
ENTITY
endotracheal intubation
ENTITY
. ,
INDICATIONS FOR PROCEDURE
ENTITY
: , This is a 50-year-old
male
ENTITY
who has been
overweight
ENTITY
for many
years
ENTITY
and has tried multiple different
weight loss diets
ENTITY
and
programs
ENTITY
. The
patient
ENTITY
has now begun to have
comorbidities
ENTITY
related to the
obesity
ENTITY
. The
patient
ENTITY
has attended our
bariatric seminar
ENTITY
and met with our
dietician
ENTITY
and
psychologist
ENTITY
. The
patient
ENTITY
has
read
ENTITY
through our
comprehensive
ENTITY
handout
ENTITY
and understands the
risks
ENTITY
and
benefits
ENTITY
of
bypass surgery
ENTITY
as evidenced by the
signing
ENTITY
of our
consent
ENTITY
form.,PROCEDURE
ENTITY
IN
DETAIL
ENTITY
: , The
risks
ENTITY
and
benefits
ENTITY
were explained to the
patient
ENTITY
.
Consent
ENTITY
was obtained. The
patient
ENTITY
was taken to the
operating room
ENTITY
and placed
supine
ENTITY
on the
operating room
ENTITY
table.
General anesthesia
ENTITY
was
administered
ENTITY
with
endotracheal intubation
ENTITY
. A
Foley catheter
ENTITY
was placed for
bladder
ENTITY
decompression
ENTITY
. All
pressure points
ENTITY
were carefully padded, and
sequential
ENTITY
compression devices
ENTITY
were placed on the
legs
ENTITY
. The
abdomen
ENTITY
was
prepped
ENTITY
and draped in
standard
ENTITY
,
sterile
ENTITY
,
surgical fashion
ENTITY
.
Marcaine
ENTITY
was
injected
ENTITY
into the
umbilicus
ENTITY
.
Load the specific model: en_core_sci_md and pass 'text' through
In [7]:
nlp_md = en_core_sci_md.load()
doc = nlp_md(text)
#Display resulting entity extraction
displacy_image = displacy.render(doc, jupyter=True,style='ent')
PREOPERATIVE
DIAGNOSIS
ENTITY
: ,
Morbid obesity
ENTITY
. ,
POSTOPERATIVE DIAGNOSIS
ENTITY
: ,
Morbid obesity
ENTITY
. ,PROCEDURE:, Laparoscopic Roux-en-Y gastric bypass,
antecolic
ENTITY
,
antegastric
ENTITY
with 25-mm
EEA
ENTITY
anastamosis
ENTITY
,
esophagogastroduodenoscopy
ENTITY
. ,ANESTHESIA: ,
General with
ENTITY
endotracheal intubation
ENTITY
. ,INDICATIONS FOR
PROCEDURE
ENTITY
: , This is a 50-year-old
male
ENTITY
who has been
overweight
ENTITY
for many
years
ENTITY
and has tried multiple different
weight loss diets
ENTITY
and
programs
ENTITY
. The
patient
ENTITY
has now begun to have
comorbidities
ENTITY
related to the
obesity
ENTITY
. The
patient
ENTITY
has
attended
ENTITY
our
bariatric seminar
ENTITY
and met with our
dietician
ENTITY
and
psychologist
ENTITY
. The
patient
ENTITY
has
read
ENTITY
through our
comprehensive
ENTITY
handout
ENTITY
and understands the
risks
ENTITY
and
benefits
ENTITY
of
bypass surgery
ENTITY
as evidenced by the
signing
ENTITY
of our
consent
ENTITY
form.,PROCEDURE IN
DETAIL
ENTITY
: , The
risks
ENTITY
and
benefits
ENTITY
were explained to the
patient
ENTITY
.
Consent
ENTITY
was obtained. The
patient
ENTITY
was taken to the
operating room
ENTITY
and placed
supine
ENTITY
on the
operating room table
ENTITY
.
General anesthesia
ENTITY
was
administered
ENTITY
with
endotracheal intubation
ENTITY
. A
Foley catheter
ENTITY
was placed for
bladder decompression
ENTITY
. All pressure points were carefully
padded
ENTITY
, and
sequential
ENTITY
compression devices
ENTITY
were placed on the
legs
ENTITY
. The
abdomen
ENTITY
was
prepped
ENTITY
and
draped
ENTITY
in
standard
ENTITY
,
sterile
ENTITY
,
surgical fashion
ENTITY
.
Marcaine
ENTITY
was
injected
ENTITY
into the
umbilicus
ENTITY
.
Load specific model: import en_ner_bc5cdr_md and pass 'text' through
In [8]:
nlp_bc = en_ner_bc5cdr_md.load()
doc = nlp_bc(text)
#Display resulting entity extraction
displacy_image = displacy.render(doc, jupyter=True,style='ent')
PREOPERATIVE DIAGNOSIS: ,
Morbid obesity
DISEASE
. ,POSTOPERATIVE DIAGNOSIS: ,
Morbid obesity
DISEASE
. ,PROCEDURE:, Laparoscopic Roux-en-Y gastric bypass, antecolic, antegastric with 25-mm EEA anastamosis, esophagogastroduodenoscopy. ,ANESTHESIA: , General with endotracheal intubation. ,INDICATIONS FOR PROCEDURE: , This is a 50-year-old male who has been overweight for many years and has tried multiple different
weight loss
DISEASE
diets and programs. The patient has now begun to have comorbidities related to the obesity. The patient has attended our bariatric seminar and met with our dietician and psychologist. The patient has read through our comprehensive handout and understands the risks and benefits of bypass surgery as evidenced by the signing of our consent form.,PROCEDURE IN DETAIL: , The risks and benefits were explained to the patient. Consent was obtained. The patient was taken to the operating room and placed supine on the operating room table. General anesthesia was administered with endotracheal intubation. A Foley catheter was placed for bladder decompression. All pressure points were carefully padded, and sequential compression devices were placed on the legs. The abdomen was prepped and draped in standard, sterile, surgical fashion.
Marcaine
CHEMICAL
was injected into the umbilicus.
Display the entity
In [9]:
print("TEXT", "START", "END", "ENTITY TYPE")
for ent in doc.ents:
print(ent.text, ent.start_char, ent.end_char, ent.label_)
TEXT START END ENTITY TYPE Morbid obesity 26 40 DISEASE Morbid obesity 70 84 DISEASE weight loss 400 411 DISEASE Marcaine 1256 1264 CHEMICAL
In [10]:
mtsample_df.dropna(subset=['transcription'], inplace=True)
mtsample_df_subset = mtsample_df.sample(n=100, random_state=42)
mtsample_df_subset.info()
mtsample_df_subset.head()
<class 'pandas.core.frame.DataFrame'> Index: 100 entries, 3162 to 3581 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 100 non-null int64 1 description 100 non-null object 2 medical_specialty 100 non-null object 3 sample_name 100 non-null object 4 transcription 100 non-null object 5 keywords 78 non-null object dtypes: int64(1), object(5) memory usage: 5.5+ KB
Out[10]:
| Unnamed: 0 | description | medical_specialty | sample_name | transcription | keywords | |
|---|---|---|---|---|---|---|
| 3162 | 3162 | Markedly elevated PT INR despite stopping Cou... | Hematology - Oncology | Hematology Consult - 1 | HISTORY OF PRESENT ILLNESS:, The patient is w... | NaN |
| 1981 | 1981 | Intercostal block from fourth to tenth interc... | Pain Management | Intercostal block - 1 | PREPROCEDURE DIAGNOSIS:, Chest pain secondary... | pain management, xylocaine, marcaine, intercos... |
| 1361 | 1361 | The patient is a 65-year-old female who under... | SOAP / Chart / Progress Notes | Lobectomy - Followup | HISTORY OF PRESENT ILLNESS: , The patient is a... | soap / chart / progress notes, non-small cell ... |
| 3008 | 3008 | Construction of right upper arm hemodialysis ... | Nephrology | Hemodialysis Fistula Construction | PREOPERATIVE DIAGNOSIS: , End-stage renal dise... | nephrology, end-stage renal disease, av dialys... |
| 4943 | 4943 | Bronchoscopy with brush biopsies. Persistent... | Cardiovascular / Pulmonary | Bronchoscopy - 8 | PREOPERATIVE DIAGNOSIS: , Persistent pneumonia... | cardiovascular / pulmonary, persistent pneumon... |
In [11]:
from spacy.matcher import Matcher
In [12]:
pattern = [{'ENT_TYPE':'CHEMICAL'}, {'LIKE_NUM': True}, {'IS_ASCII': True}]
matcher = Matcher(nlp_bc.vocab)
matcher.add("DRUG_DOSE", [pattern])
In [13]:
ner_named_entities = []
ner_category_labels = []
for transcription in mtsample_df_subset['transcription']:
doc = nlp_bc(transcription)
matches = matcher(doc)
for match_id, start, end in matches:
string_id = nlp_bc.vocab.strings[match_id] # get string representation
span = doc[start:end] # the matched span adding drugs doses
print(span.text, start, end, string_id,)
ner_named_entities.append(span.text)
ner_category_labels.append(string_id)
#Add disease and drugs
for ent in doc.ents:
print(ent.text, ent.start_char, ent.end_char, ent.label_)
ner_named_entities.append(ent.text)
ner_category_labels.append(ent.label_)
ner_model_df = pd.DataFrame({'ner_named_entity': ner_named_entities, 'ner_category_label': ner_category_labels})
Xylocaine 20 mL 129 132 DRUG_DOSE Chest pain 26 36 DISEASE Chest pain 122 132 DISEASE intercostal block 318 335 DISEASE chest pain 388 398 DISEASE Xylocaine 730 739 CHEMICAL Marcaine 750 758 CHEMICAL contusion 987 996 DISEASE respiratory distress 1076 1096 DISEASE pain 1150 1154 DISEASE Marcaine 0.25% 133 136 DRUG_DOSE Chest pain 26 36 DISEASE Chest pain 122 132 DISEASE intercostal block 318 335 DISEASE chest pain 388 398 DISEASE Xylocaine 730 739 CHEMICAL Marcaine 750 758 CHEMICAL contusion 987 996 DISEASE respiratory distress 1076 1096 DISEASE pain 1150 1154 DISEASE Aspirin 81 mg 204 207 DRUG_DOSE non-small cell lung cancer 114 140 DISEASE barium 322 328 CHEMICAL hiatal hernia 373 386 DISEASE odynophagia 647 658 DISEASE tenderness 829 839 DISEASE DVT 918 921 DISEASE weight loss 952 963 DISEASE anorexia 965 973 DISEASE fevers 975 981 DISEASE chills 983 989 DISEASE headaches 991 1000 DISEASE aches 1006 1011 DISEASE pains 1015 1020 DISEASE cough 1022 1027 DISEASE hemoptysis 1029 1039 DISEASE shortness of breath 1041 1060 DISEASE dyspnea 1073 1080 DISEASE Aspirin 1109 1116 CHEMICAL Spiriva 1134 1141 CHEMICAL albuterol 1159 1168 CHEMICAL cyanosis 1455 1463 DISEASE clubbing 1465 1473 DISEASE Cranial nerves II 1516 1533 DISEASE non-small cell lung cancer 1657 1683 DISEASE DVT 1853 1856 DISEASE nifedipine 1930 1940 CHEMICAL esophageal spasm 1966 1982 DISEASE Spiriva 10 mcg 212 215 DRUG_DOSE non-small cell lung cancer 114 140 DISEASE barium 322 328 CHEMICAL hiatal hernia 373 386 DISEASE odynophagia 647 658 DISEASE tenderness 829 839 DISEASE DVT 918 921 DISEASE weight loss 952 963 DISEASE anorexia 965 973 DISEASE fevers 975 981 DISEASE chills 983 989 DISEASE headaches 991 1000 DISEASE aches 1006 1011 DISEASE pains 1015 1020 DISEASE cough 1022 1027 DISEASE hemoptysis 1029 1039 DISEASE shortness of breath 1041 1060 DISEASE dyspnea 1073 1080 DISEASE Aspirin 1109 1116 CHEMICAL Spiriva 1134 1141 CHEMICAL albuterol 1159 1168 CHEMICAL cyanosis 1455 1463 DISEASE clubbing 1465 1473 DISEASE Cranial nerves II 1516 1533 DISEASE non-small cell lung cancer 1657 1683 DISEASE DVT 1853 1856 DISEASE nifedipine 1930 1940 CHEMICAL esophageal spasm 1966 1982 DISEASE nifedipine 10 mg 376 379 DRUG_DOSE non-small cell lung cancer 114 140 DISEASE barium 322 328 CHEMICAL hiatal hernia 373 386 DISEASE odynophagia 647 658 DISEASE tenderness 829 839 DISEASE DVT 918 921 DISEASE weight loss 952 963 DISEASE anorexia 965 973 DISEASE fevers 975 981 DISEASE chills 983 989 DISEASE headaches 991 1000 DISEASE aches 1006 1011 DISEASE pains 1015 1020 DISEASE cough 1022 1027 DISEASE hemoptysis 1029 1039 DISEASE shortness of breath 1041 1060 DISEASE dyspnea 1073 1080 DISEASE Aspirin 1109 1116 CHEMICAL Spiriva 1134 1141 CHEMICAL albuterol 1159 1168 CHEMICAL cyanosis 1455 1463 DISEASE clubbing 1465 1473 DISEASE Cranial nerves II 1516 1533 DISEASE non-small cell lung cancer 1657 1683 DISEASE DVT 1853 1856 DISEASE nifedipine 1930 1940 CHEMICAL esophageal spasm 1966 1982 DISEASE aspirin one tablet 220 223 DRUG_DOSE thrombocytopenia 160 176 DISEASE atrial fibrillation 696 715 DISEASE fever 884 889 DISEASE chills 891 897 DISEASE sweats 908 914 DISEASE lymphadenopathy 920 935 DISEASE nausea 941 947 DISEASE vomiting 951 959 DISEASE Multivitamin q.d 1023 1039 CHEMICAL aspirin 1042 1049 CHEMICAL q.d 1061 1064 CHEMICAL Lupron q. 1067 1076 CHEMICAL Warfarin 1116 1124 CHEMICAL negative.,PAST 1222 1236 DISEASE Prostate cancer 1309 1324 DISEASE prostate 1565 1573 DISEASE q. three months 227 230 DRUG_DOSE thrombocytopenia 160 176 DISEASE atrial fibrillation 696 715 DISEASE fever 884 889 DISEASE chills 891 897 DISEASE sweats 908 914 DISEASE lymphadenopathy 920 935 DISEASE nausea 941 947 DISEASE vomiting 951 959 DISEASE Multivitamin q.d 1023 1039 CHEMICAL aspirin 1042 1049 CHEMICAL q.d 1061 1064 CHEMICAL Lupron q. 1067 1076 CHEMICAL Warfarin 1116 1124 CHEMICAL negative.,PAST 1222 1236 DISEASE Prostate cancer 1309 1324 DISEASE prostate 1565 1573 DISEASE Warfarin 2.5 mg 239 242 DRUG_DOSE thrombocytopenia 160 176 DISEASE atrial fibrillation 696 715 DISEASE fever 884 889 DISEASE chills 891 897 DISEASE sweats 908 914 DISEASE lymphadenopathy 920 935 DISEASE nausea 941 947 DISEASE vomiting 951 959 DISEASE Multivitamin q.d 1023 1039 CHEMICAL aspirin 1042 1049 CHEMICAL q.d 1061 1064 CHEMICAL Lupron q. 1067 1076 CHEMICAL Warfarin 1116 1124 CHEMICAL negative.,PAST 1222 1236 DISEASE Prostate cancer 1309 1324 DISEASE prostate 1565 1573 DISEASE Topamax 100 mg 57 60 DRUG_DOSE hypertension 45 57 DISEASE breath.,PAST 75 87 DISEASE alcohol 219 226 CHEMICAL obesity 293 300 DISEASE Topamax 342 349 CHEMICAL Zoloft 370 376 CHEMICAL Abilify 397 404 CHEMICAL Motrin 417 423 CHEMICAL edema 869 874 DISEASE clubbing 876 884 DISEASE Zoloft 100 mg 63 66 DRUG_DOSE hypertension 45 57 DISEASE breath.,PAST 75 87 DISEASE alcohol 219 226 CHEMICAL obesity 293 300 DISEASE Topamax 342 349 CHEMICAL Zoloft 370 376 CHEMICAL Abilify 397 404 CHEMICAL Motrin 417 423 CHEMICAL edema 869 874 DISEASE clubbing 876 884 DISEASE Abilify 5 mg 69 72 DRUG_DOSE hypertension 45 57 DISEASE breath.,PAST 75 87 DISEASE alcohol 219 226 CHEMICAL obesity 293 300 DISEASE Topamax 342 349 CHEMICAL Zoloft 370 376 CHEMICAL Abilify 397 404 CHEMICAL Motrin 417 423 CHEMICAL edema 869 874 DISEASE clubbing 876 884 DISEASE Motrin 800 mg 74 77 DRUG_DOSE hypertension 45 57 DISEASE breath.,PAST 75 87 DISEASE alcohol 219 226 CHEMICAL obesity 293 300 DISEASE Topamax 342 349 CHEMICAL Zoloft 370 376 CHEMICAL Abilify 397 404 CHEMICAL Motrin 417 423 CHEMICAL edema 869 874 DISEASE clubbing 876 884 DISEASE Xanax 1 mg 76 79 DRUG_DOSE Chronic obstructive pulmonary disease.,2 29 69 DISEASE Diabetes 114 122 DISEASE chronic obstructive pulmonary disease.,2 247 287 DISEASE Diabetes 289 297 DISEASE Albuterol 553 562 CHEMICAL Xanax 581 586 CHEMICAL Cardizem 602 610 CHEMICAL Colace 631 637 CHEMICAL NPH 10 units subcutaneous b.i.d.,7 685 719 DISEASE Atrovent 721 729 CHEMICAL Statin 748 754 CHEMICAL Paxil 803 808 CHEMICAL Prednisone 826 836 CHEMICAL Darvocet 854 862 CHEMICAL Nexium 960 966 CHEMICAL complaints of shortness of breath 1047 1080 DISEASE COPD 1109 1113 DISEASE AMA 1186 1189 CHEMICAL COPD 1303 1307 DISEASE bronchitis 1366 1376 DISEASE diverticular disease 1608 1628 DISEASE hemorrhage 1690 1700 DISEASE Plavix 1702 1708 CHEMICAL Lovenox 1745 1752 CHEMICAL bleeding 1787 1795 DISEASE Colace 100 mg 87 90 DRUG_DOSE Chronic obstructive pulmonary disease.,2 29 69 DISEASE Diabetes 114 122 DISEASE chronic obstructive pulmonary disease.,2 247 287 DISEASE Diabetes 289 297 DISEASE Albuterol 553 562 CHEMICAL Xanax 581 586 CHEMICAL Cardizem 602 610 CHEMICAL Colace 631 637 CHEMICAL NPH 10 units subcutaneous b.i.d.,7 685 719 DISEASE Atrovent 721 729 CHEMICAL Statin 748 754 CHEMICAL Paxil 803 808 CHEMICAL Prednisone 826 836 CHEMICAL Darvocet 854 862 CHEMICAL Nexium 960 966 CHEMICAL complaints of shortness of breath 1047 1080 DISEASE COPD 1109 1113 DISEASE AMA 1186 1189 CHEMICAL COPD 1303 1307 DISEASE bronchitis 1366 1376 DISEASE diverticular disease 1608 1628 DISEASE hemorrhage 1690 1700 DISEASE Plavix 1702 1708 CHEMICAL Lovenox 1745 1752 CHEMICAL bleeding 1787 1795 DISEASE Paxil 10 mg 120 123 DRUG_DOSE Chronic obstructive pulmonary disease.,2 29 69 DISEASE Diabetes 114 122 DISEASE chronic obstructive pulmonary disease.,2 247 287 DISEASE Diabetes 289 297 DISEASE Albuterol 553 562 CHEMICAL Xanax 581 586 CHEMICAL Cardizem 602 610 CHEMICAL Colace 631 637 CHEMICAL NPH 10 units subcutaneous b.i.d.,7 685 719 DISEASE Atrovent 721 729 CHEMICAL Statin 748 754 CHEMICAL Paxil 803 808 CHEMICAL Prednisone 826 836 CHEMICAL Darvocet 854 862 CHEMICAL Nexium 960 966 CHEMICAL complaints of shortness of breath 1047 1080 DISEASE COPD 1109 1113 DISEASE AMA 1186 1189 CHEMICAL COPD 1303 1307 DISEASE bronchitis 1366 1376 DISEASE diverticular disease 1608 1628 DISEASE hemorrhage 1690 1700 DISEASE Plavix 1702 1708 CHEMICAL Lovenox 1745 1752 CHEMICAL bleeding 1787 1795 DISEASE Prednisone 20 mg 125 128 DRUG_DOSE Chronic obstructive pulmonary disease.,2 29 69 DISEASE Diabetes 114 122 DISEASE chronic obstructive pulmonary disease.,2 247 287 DISEASE Diabetes 289 297 DISEASE Albuterol 553 562 CHEMICAL Xanax 581 586 CHEMICAL Cardizem 602 610 CHEMICAL Colace 631 637 CHEMICAL NPH 10 units subcutaneous b.i.d.,7 685 719 DISEASE Atrovent 721 729 CHEMICAL Statin 748 754 CHEMICAL Paxil 803 808 CHEMICAL Prednisone 826 836 CHEMICAL Darvocet 854 862 CHEMICAL Nexium 960 966 CHEMICAL complaints of shortness of breath 1047 1080 DISEASE COPD 1109 1113 DISEASE AMA 1186 1189 CHEMICAL COPD 1303 1307 DISEASE bronchitis 1366 1376 DISEASE diverticular disease 1608 1628 DISEASE hemorrhage 1690 1700 DISEASE Plavix 1702 1708 CHEMICAL Lovenox 1745 1752 CHEMICAL bleeding 1787 1795 DISEASE Nexium 40 mg 149 152 DRUG_DOSE Chronic obstructive pulmonary disease.,2 29 69 DISEASE Diabetes 114 122 DISEASE chronic obstructive pulmonary disease.,2 247 287 DISEASE Diabetes 289 297 DISEASE Albuterol 553 562 CHEMICAL Xanax 581 586 CHEMICAL Cardizem 602 610 CHEMICAL Colace 631 637 CHEMICAL NPH 10 units subcutaneous b.i.d.,7 685 719 DISEASE Atrovent 721 729 CHEMICAL Statin 748 754 CHEMICAL Paxil 803 808 CHEMICAL Prednisone 826 836 CHEMICAL Darvocet 854 862 CHEMICAL Nexium 960 966 CHEMICAL complaints of shortness of breath 1047 1080 DISEASE COPD 1109 1113 DISEASE AMA 1186 1189 CHEMICAL COPD 1303 1307 DISEASE bronchitis 1366 1376 DISEASE diverticular disease 1608 1628 DISEASE hemorrhage 1690 1700 DISEASE Plavix 1702 1708 CHEMICAL Lovenox 1745 1752 CHEMICAL bleeding 1787 1795 DISEASE Naprosyn one p.o 1109 1112 DRUG_DOSE Hallux interphalangeus 59 81 DISEASE Hallux interphalangeus 155 177 DISEASE foot.,PROCEDURES 185 201 CHEMICAL painful bunion 539 553 DISEASE painful bunion 627 641 DISEASE Marcaine 1535 1543 CHEMICAL Lidocaine 1557 1566 CHEMICAL interphalangeus deformity 3925 3950 DISEASE dexamethasone phosphate 5166 5189 CHEMICAL Vicodin 5868 5875 CHEMICAL pain 5907 5911 DISEASE Naprosyn 5916 5924 CHEMICAL Lidocaine 1% 260 263 DRUG_DOSE basal cell nevus syndrome 676 701 DISEASE throat 1325 1331 DISEASE Lidocaine 1450 1459 CHEMICAL epinephrine 1468 1479 CHEMICAL 1:100,000 1497 1506 CHEMICAL tooth 2206 2211 DISEASE Tooth 2235 2240 DISEASE clindamycin 3685 3696 CHEMICAL throat 3921 3927 DISEASE blood loss 4068 4078 DISEASE lidocaine 2% 221 224 DRUG_DOSE Mobitz type II block 64 84 DISEASE AV dissociation 90 105 DISEASE Mobitz type II block 146 166 DISEASE Benadryl 974 982 CHEMICAL lidocaine 1370 1379 CHEMICAL asystole 1541 1549 DISEASE standby.,After 1789 1803 CHEMICAL bleeding 3632 3640 DISEASE asystole 3838 3846 DISEASE Creatinine 1.3, 91 94 DRUG_DOSE pain 200 204 DISEASE erectile dysfunction 261 281 DISEASE Creatinine 496 506 CHEMICAL sodium 512 518 CHEMICAL potassium 528 537 CHEMICAL Calcium 544 551 CHEMICAL Marcaine 1037 1045 CHEMICAL sodium 141, 94 97 DRUG_DOSE pain 200 204 DISEASE erectile dysfunction 261 281 DISEASE Creatinine 496 506 CHEMICAL sodium 512 518 CHEMICAL potassium 528 537 CHEMICAL Calcium 544 551 CHEMICAL Marcaine 1037 1045 CHEMICAL potassium 4.0. 98 101 DRUG_DOSE pain 200 204 DISEASE erectile dysfunction 261 281 DISEASE Creatinine 496 506 CHEMICAL sodium 512 518 CHEMICAL potassium 528 537 CHEMICAL Calcium 544 551 CHEMICAL Marcaine 1037 1045 CHEMICAL Calcium 8.6. 102 105 DRUG_DOSE pain 200 204 DISEASE erectile dysfunction 261 281 DISEASE Creatinine 496 506 CHEMICAL sodium 512 518 CHEMICAL potassium 528 537 CHEMICAL Calcium 544 551 CHEMICAL Marcaine 1037 1045 CHEMICAL 7-hole 2.3 titanium 607 610 DRUG_DOSE fractures 317 326 DISEASE titanium 566 574 CHEMICAL fractures 994 1003 DISEASE loss of consciousness 1017 1038 DISEASE mall occlusion 1114 1128 DISEASE sore 1133 1137 DISEASE fractures 1264 1273 DISEASE fracture 1365 1373 DISEASE fractures 1525 1534 DISEASE ASA 1803 1806 CHEMICAL lidocaine 2150 2159 CHEMICAL 1:100,000 epinephrine 2165 2186 CHEMICAL nerve block 2225 2236 DISEASE malocclusion 2626 2638 DISEASE bruxism 2651 2658 DISEASE lidocaine 2831 2840 CHEMICAL epinephrine 2846 2857 CHEMICAL fracture 3304 3312 DISEASE fracture 3424 3432 DISEASE 7-hole 3469 3475 CHEMICAL titanium 3480 3488 CHEMICAL lidocaine 3681 3690 CHEMICAL 1:100,000 epinephrine 3696 3717 CHEMICAL fracture 3896 3904 DISEASE fracture 3944 3952 DISEASE fracture 4129 4137 DISEASE fracture 4243 4251 DISEASE throat 4760 4766 DISEASE aspirin 81 mg 434 437 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE Klonopin 0.5 mg 448 451 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE digoxin 0.125 mg 456 459 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE Lexapro 10 mg 463 466 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE TriCor 145 mg 470 473 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE Lasix 20 mg 478 481 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE Lopressor 75 mg 504 507 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE chloride 20 mEq 539 542 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE Zofran 4 mg 557 560 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE Lexapro 10 mg 944 947 DRUG_DOSE pain 106 110 DISEASE fracture 127 135 DISEASE pain 351 355 DISEASE fracture 391 399 DISEASE pain 485 489 DISEASE pain 533 537 DISEASE pain 582 586 DISEASE pain 697 701 DISEASE meningioma 799 809 DISEASE depressive symptoms 867 886 DISEASE depressed 1300 1309 DISEASE fracture 1403 1411 DISEASE ideations 1494 1503 DISEASE ideations 1518 1527 DISEASE manic 1577 1582 DISEASE impulsivity 1652 1663 DISEASE hallucinations 1693 1707 DISEASE delusions 1723 1732 DISEASE flashbacks 1800 1810 DISEASE anxiety 1834 1841 DISEASE restless 1866 1874 DISEASE Hypertension 2007 2019 DISEASE cataracts 2021 2030 DISEASE osteoporosis 2050 2062 DISEASE fracture 2112 2120 DISEASE diabetes 2142 2150 DISEASE seizures 2185 2193 DISEASE strokes 2195 2202 DISEASE aspirin 2271 2278 CHEMICAL Klonopin 2323 2331 CHEMICAL digoxin 2352 2359 CHEMICAL Lexapro 2381 2388 CHEMICAL TriCor 2407 2413 CHEMICAL Lasix 2440 2445 CHEMICAL lidocaine 2487 2496 CHEMICAL Zestril 2504 2511 CHEMICAL Lopressor 2540 2549 CHEMICAL potassium chloride 2677 2695 CHEMICAL p.o 2703 2706 CHEMICAL p.o 2733 2736 CHEMICAL Zofran 2752 2758 CHEMICAL psychiatric 2940 2951 DISEASE psychiatric 3002 3013 DISEASE VP 3178 3180 CHEMICAL depressed 3827 3836 DISEASE delusions 3956 3965 DISEASE ideations 3989 3998 DISEASE hallucinations 4035 4049 DISEASE fracture 4348 4356 DISEASE thoracic compression fractures 4365 4395 DISEASE depression 4473 4483 DISEASE depression disorder.,Axis II 4551 4579 DISEASE Osteoporosis 4603 4615 DISEASE hypertension 4617 4629 DISEASE fracture 4635 4643 DISEASE diabetes 4654 4662 DISEASE meningioma 4664 4674 DISEASE Lexapro 4823 4830 CHEMICAL TCA 4905 4908 CHEMICAL depressive 4926 4936 DISEASE inpatient psychiatric care.,Thank 4973 5006 DISEASE Premarin 1.25 mg 312 315 DRUG_DOSE pain 345 349 DISEASE nerve block 421 432 DISEASE pain 472 476 DISEASE pain 706 710 DISEASE pain 777 781 DISEASE pain 882 886 DISEASE Premarin 1574 1582 CHEMICAL Elmiron 1598 1605 CHEMICAL Elavil 1621 1627 CHEMICAL OxyContin 1646 1655 CHEMICAL Toprol 1695 1701 CHEMICAL Compazine 1731 1740 CHEMICAL morphine 2846 2854 CHEMICAL pain 2957 2961 DISEASE lidocaine 3068 3077 CHEMICAL DMSO 3166 3170 CHEMICAL Kenalog 3172 3179 CHEMICAL heparin 3181 3188 CHEMICAL sodium bicarbonate 3194 3212 CHEMICAL Elmiron 100 mg 317 320 DRUG_DOSE pain 345 349 DISEASE nerve block 421 432 DISEASE pain 472 476 DISEASE pain 706 710 DISEASE pain 777 781 DISEASE pain 882 886 DISEASE Premarin 1574 1582 CHEMICAL Elmiron 1598 1605 CHEMICAL Elavil 1621 1627 CHEMICAL OxyContin 1646 1655 CHEMICAL Toprol 1695 1701 CHEMICAL Compazine 1731 1740 CHEMICAL morphine 2846 2854 CHEMICAL pain 2957 2961 DISEASE lidocaine 3068 3077 CHEMICAL DMSO 3166 3170 CHEMICAL Kenalog 3172 3179 CHEMICAL heparin 3181 3188 CHEMICAL sodium bicarbonate 3194 3212 CHEMICAL Elavil 50 mg 323 326 DRUG_DOSE pain 345 349 DISEASE nerve block 421 432 DISEASE pain 472 476 DISEASE pain 706 710 DISEASE pain 777 781 DISEASE pain 882 886 DISEASE Premarin 1574 1582 CHEMICAL Elmiron 1598 1605 CHEMICAL Elavil 1621 1627 CHEMICAL OxyContin 1646 1655 CHEMICAL Toprol 1695 1701 CHEMICAL Compazine 1731 1740 CHEMICAL morphine 2846 2854 CHEMICAL pain 2957 2961 DISEASE lidocaine 3068 3077 CHEMICAL DMSO 3166 3170 CHEMICAL Kenalog 3172 3179 CHEMICAL heparin 3181 3188 CHEMICAL sodium bicarbonate 3194 3212 CHEMICAL OxyContin 10 mg 329 332 DRUG_DOSE pain 345 349 DISEASE nerve block 421 432 DISEASE pain 472 476 DISEASE pain 706 710 DISEASE pain 777 781 DISEASE pain 882 886 DISEASE Premarin 1574 1582 CHEMICAL Elmiron 1598 1605 CHEMICAL Elavil 1621 1627 CHEMICAL OxyContin 1646 1655 CHEMICAL Toprol 1695 1701 CHEMICAL Compazine 1731 1740 CHEMICAL morphine 2846 2854 CHEMICAL pain 2957 2961 DISEASE lidocaine 3068 3077 CHEMICAL DMSO 3166 3170 CHEMICAL Kenalog 3172 3179 CHEMICAL heparin 3181 3188 CHEMICAL sodium bicarbonate 3194 3212 CHEMICAL Lidocaine 1.5% 66 69 DRUG_DOSE INT 196 199 CHEMICAL Lidocaine 376 385 CHEMICAL Marcaine 869 877 CHEMICAL Depo-Medrol 892 903 CHEMICAL Dilantin 300 mg 205 208 DRUG_DOSE glioblastoma multiforme 179 202 DISEASE Temodar 100-mg 564 578 CHEMICAL confusion 623 632 DISEASE diagnosis.,SOCIAL 927 944 CHEMICAL Dilantin 1033 1041 CHEMICAL Haloperidol 1060 1071 CHEMICAL Dexamethasone 1086 1099 CHEMICAL Docusate 1117 1125 CHEMICAL Ibuprofen 1169 1178 CHEMICAL Zantac 1197 1203 CHEMICAL Temodar 1228 1235 CHEMICAL Mouthwash 1260 1269 CHEMICAL Tylenol 1282 1289 CHEMICAL Glioblastoma multiforme 1657 1680 DISEASE cancer 1714 1720 DISEASE tumor 1949 1954 DISEASE Temodar 2080 2087 CHEMICAL Haloperidol 1 mg 211 214 DRUG_DOSE glioblastoma multiforme 179 202 DISEASE Temodar 100-mg 564 578 CHEMICAL confusion 623 632 DISEASE diagnosis.,SOCIAL 927 944 CHEMICAL Dilantin 1033 1041 CHEMICAL Haloperidol 1060 1071 CHEMICAL Dexamethasone 1086 1099 CHEMICAL Docusate 1117 1125 CHEMICAL Ibuprofen 1169 1178 CHEMICAL Zantac 1197 1203 CHEMICAL Temodar 1228 1235 CHEMICAL Mouthwash 1260 1269 CHEMICAL Tylenol 1282 1289 CHEMICAL Glioblastoma multiforme 1657 1680 DISEASE cancer 1714 1720 DISEASE tumor 1949 1954 DISEASE Temodar 2080 2087 CHEMICAL Dexamethasone 4 mg 217 220 DRUG_DOSE glioblastoma multiforme 179 202 DISEASE Temodar 100-mg 564 578 CHEMICAL confusion 623 632 DISEASE diagnosis.,SOCIAL 927 944 CHEMICAL Dilantin 1033 1041 CHEMICAL Haloperidol 1060 1071 CHEMICAL Dexamethasone 1086 1099 CHEMICAL Docusate 1117 1125 CHEMICAL Ibuprofen 1169 1178 CHEMICAL Zantac 1197 1203 CHEMICAL Temodar 1228 1235 CHEMICAL Mouthwash 1260 1269 CHEMICAL Tylenol 1282 1289 CHEMICAL Glioblastoma multiforme 1657 1680 DISEASE cancer 1714 1720 DISEASE tumor 1949 1954 DISEASE Temodar 2080 2087 CHEMICAL Docusate 100 mg 226 229 DRUG_DOSE glioblastoma multiforme 179 202 DISEASE Temodar 100-mg 564 578 CHEMICAL confusion 623 632 DISEASE diagnosis.,SOCIAL 927 944 CHEMICAL Dilantin 1033 1041 CHEMICAL Haloperidol 1060 1071 CHEMICAL Dexamethasone 1086 1099 CHEMICAL Docusate 1117 1125 CHEMICAL Ibuprofen 1169 1178 CHEMICAL Zantac 1197 1203 CHEMICAL Temodar 1228 1235 CHEMICAL Mouthwash 1260 1269 CHEMICAL Tylenol 1282 1289 CHEMICAL Glioblastoma multiforme 1657 1680 DISEASE cancer 1714 1720 DISEASE tumor 1949 1954 DISEASE Temodar 2080 2087 CHEMICAL Ibuprofen 600 mg 238 241 DRUG_DOSE glioblastoma multiforme 179 202 DISEASE Temodar 100-mg 564 578 CHEMICAL confusion 623 632 DISEASE diagnosis.,SOCIAL 927 944 CHEMICAL Dilantin 1033 1041 CHEMICAL Haloperidol 1060 1071 CHEMICAL Dexamethasone 1086 1099 CHEMICAL Docusate 1117 1125 CHEMICAL Ibuprofen 1169 1178 CHEMICAL Zantac 1197 1203 CHEMICAL Temodar 1228 1235 CHEMICAL Mouthwash 1260 1269 CHEMICAL Tylenol 1282 1289 CHEMICAL Glioblastoma multiforme 1657 1680 DISEASE cancer 1714 1720 DISEASE tumor 1949 1954 DISEASE Temodar 2080 2087 CHEMICAL Zantac 150 mg 244 247 DRUG_DOSE glioblastoma multiforme 179 202 DISEASE Temodar 100-mg 564 578 CHEMICAL confusion 623 632 DISEASE diagnosis.,SOCIAL 927 944 CHEMICAL Dilantin 1033 1041 CHEMICAL Haloperidol 1060 1071 CHEMICAL Dexamethasone 1086 1099 CHEMICAL Docusate 1117 1125 CHEMICAL Ibuprofen 1169 1178 CHEMICAL Zantac 1197 1203 CHEMICAL Temodar 1228 1235 CHEMICAL Mouthwash 1260 1269 CHEMICAL Tylenol 1282 1289 CHEMICAL Glioblastoma multiforme 1657 1680 DISEASE cancer 1714 1720 DISEASE tumor 1949 1954 DISEASE Temodar 2080 2087 CHEMICAL Temodar 100 mg 252 255 DRUG_DOSE glioblastoma multiforme 179 202 DISEASE Temodar 100-mg 564 578 CHEMICAL confusion 623 632 DISEASE diagnosis.,SOCIAL 927 944 CHEMICAL Dilantin 1033 1041 CHEMICAL Haloperidol 1060 1071 CHEMICAL Dexamethasone 1086 1099 CHEMICAL Docusate 1117 1125 CHEMICAL Ibuprofen 1169 1178 CHEMICAL Zantac 1197 1203 CHEMICAL Temodar 1228 1235 CHEMICAL Mouthwash 1260 1269 CHEMICAL Tylenol 1282 1289 CHEMICAL Glioblastoma multiforme 1657 1680 DISEASE cancer 1714 1720 DISEASE tumor 1949 1954 DISEASE Temodar 2080 2087 CHEMICAL Klonopin 1 mg 327 330 DRUG_DOSE panic 331 336 DISEASE sore 799 803 DISEASE grave disability 938 954 DISEASE bipolar disorder 977 993 DISEASE mania 999 1004 DISEASE depression 1009 1019 DISEASE anxiety 1025 1032 DISEASE panic 1037 1042 DISEASE migraine headaches 1324 1342 DISEASE allergy 1387 1394 DISEASE Haldol 1399 1405 CHEMICAL Klonopin 1693 1701 CHEMICAL lithium 1742 1749 CHEMICAL Depakote 1768 1776 CHEMICAL Seroquel 1796 1804 CHEMICAL benzodiazepines 2037 2052 CHEMICAL cocaine 2054 2061 CHEMICAL primary psychotic symptoms 2202 2228 DISEASE agitation 2235 2244 DISEASE psychomotor hyperactivity 2250 2275 DISEASE psychiatric 2337 2348 DISEASE bipolar disorder 2521 2537 DISEASE anxiety 2539 2546 DISEASE polysubstance abuse 2548 2567 DISEASE ADHD 2585 2589 DISEASE polysubstance abuse 2662 2681 DISEASE depression 2793 2803 DISEASE anxiety 2809 2816 DISEASE panic 2821 2826 DISEASE Polysubstance abuse 2840 2859 DISEASE benzodiazepines 2861 2876 CHEMICAL migraine headaches 3009 3027 DISEASE stepfather 3512 3522 CHEMICAL lithium 450 mg 336 339 DRUG_DOSE panic 331 336 DISEASE sore 799 803 DISEASE grave disability 938 954 DISEASE bipolar disorder 977 993 DISEASE mania 999 1004 DISEASE depression 1009 1019 DISEASE anxiety 1025 1032 DISEASE panic 1037 1042 DISEASE migraine headaches 1324 1342 DISEASE allergy 1387 1394 DISEASE Haldol 1399 1405 CHEMICAL Klonopin 1693 1701 CHEMICAL lithium 1742 1749 CHEMICAL Depakote 1768 1776 CHEMICAL Seroquel 1796 1804 CHEMICAL benzodiazepines 2037 2052 CHEMICAL cocaine 2054 2061 CHEMICAL primary psychotic symptoms 2202 2228 DISEASE agitation 2235 2244 DISEASE psychomotor hyperactivity 2250 2275 DISEASE psychiatric 2337 2348 DISEASE bipolar disorder 2521 2537 DISEASE anxiety 2539 2546 DISEASE polysubstance abuse 2548 2567 DISEASE ADHD 2585 2589 DISEASE polysubstance abuse 2662 2681 DISEASE depression 2793 2803 DISEASE anxiety 2809 2816 DISEASE panic 2821 2826 DISEASE Polysubstance abuse 2840 2859 DISEASE benzodiazepines 2861 2876 CHEMICAL migraine headaches 3009 3027 DISEASE stepfather 3512 3522 CHEMICAL Depakote 1000 mg 342 345 DRUG_DOSE panic 331 336 DISEASE sore 799 803 DISEASE grave disability 938 954 DISEASE bipolar disorder 977 993 DISEASE mania 999 1004 DISEASE depression 1009 1019 DISEASE anxiety 1025 1032 DISEASE panic 1037 1042 DISEASE migraine headaches 1324 1342 DISEASE allergy 1387 1394 DISEASE Haldol 1399 1405 CHEMICAL Klonopin 1693 1701 CHEMICAL lithium 1742 1749 CHEMICAL Depakote 1768 1776 CHEMICAL Seroquel 1796 1804 CHEMICAL benzodiazepines 2037 2052 CHEMICAL cocaine 2054 2061 CHEMICAL primary psychotic symptoms 2202 2228 DISEASE agitation 2235 2244 DISEASE psychomotor hyperactivity 2250 2275 DISEASE psychiatric 2337 2348 DISEASE bipolar disorder 2521 2537 DISEASE anxiety 2539 2546 DISEASE polysubstance abuse 2548 2567 DISEASE ADHD 2585 2589 DISEASE polysubstance abuse 2662 2681 DISEASE depression 2793 2803 DISEASE anxiety 2809 2816 DISEASE panic 2821 2826 DISEASE Polysubstance abuse 2840 2859 DISEASE benzodiazepines 2861 2876 CHEMICAL migraine headaches 3009 3027 DISEASE stepfather 3512 3522 CHEMICAL Seroquel 1000 mg 348 351 DRUG_DOSE panic 331 336 DISEASE sore 799 803 DISEASE grave disability 938 954 DISEASE bipolar disorder 977 993 DISEASE mania 999 1004 DISEASE depression 1009 1019 DISEASE anxiety 1025 1032 DISEASE panic 1037 1042 DISEASE migraine headaches 1324 1342 DISEASE allergy 1387 1394 DISEASE Haldol 1399 1405 CHEMICAL Klonopin 1693 1701 CHEMICAL lithium 1742 1749 CHEMICAL Depakote 1768 1776 CHEMICAL Seroquel 1796 1804 CHEMICAL benzodiazepines 2037 2052 CHEMICAL cocaine 2054 2061 CHEMICAL primary psychotic symptoms 2202 2228 DISEASE agitation 2235 2244 DISEASE psychomotor hyperactivity 2250 2275 DISEASE psychiatric 2337 2348 DISEASE bipolar disorder 2521 2537 DISEASE anxiety 2539 2546 DISEASE polysubstance abuse 2548 2567 DISEASE ADHD 2585 2589 DISEASE polysubstance abuse 2662 2681 DISEASE depression 2793 2803 DISEASE anxiety 2809 2816 DISEASE panic 2821 2826 DISEASE Polysubstance abuse 2840 2859 DISEASE benzodiazepines 2861 2876 CHEMICAL migraine headaches 3009 3027 DISEASE stepfather 3512 3522 CHEMICAL omeprazole 20 mg 1097 1100 DRUG_DOSE pain 105 109 DISEASE pain 154 158 DISEASE earache 333 340 DISEASE earache 458 465 DISEASE trauma 548 554 DISEASE hypertension 1385 1397 DISEASE heartburn 1415 1424 DISEASE otitis 1810 1816 DISEASE respiratory infections 1843 1865 DISEASE amoxicillin 1900 1911 CHEMICAL otitis 1921 1927 DISEASE Pepcid 2113 2119 CHEMICAL heartburn 2146 2155 DISEASE Pepcid 2202 2208 CHEMICAL sore throats 2266 2278 DISEASE throat 2320 2326 DISEASE tenderness 3713 3723 DISEASE heartburn 3740 3749 DISEASE hepatosplenomegaly 3840 3858 DISEASE tenderness 3862 3872 DISEASE CVA 3878 3881 CHEMICAL barium 4224 4230 CHEMICAL peptic ulcer disease/gastritis 4429 4459 DISEASE gastro esophageal reflux disease 4465 4497 DISEASE GERD 4499 4503 DISEASE carbonated beverage syndrome 4508 4536 DISEASE otitis 4582 4588 DISEASE penicillin 4604 4614 CHEMICAL otitis 4717 4723 DISEASE peptic ulcer disease 4979 4999 DISEASE pylori 5015 5021 DISEASE pain 5041 5045 DISEASE Trauma 5080 5086 DISEASE Trauma 5089 5095 DISEASE heartburn 5398 5407 DISEASE GERD 5506 5510 DISEASE edema 5626 5631 DISEASE inflammation 5633 5645 DISEASE omeprazole 5714 5724 CHEMICAL heartburn 5803 5812 DISEASE antacid 5903 5910 CHEMICAL Xylocaine 1% 182 185 DRUG_DOSE Masticatory dysfunction.,POSTOPERATIVE 113 151 DISEASE Xylocaine 1077 1086 CHEMICAL 1:100,000 epinephrine 1091 1112 CHEMICAL palate 1591 1597 DISEASE titanium 2178 2186 CHEMICAL platelet-rich 2418 2431 CHEMICAL Xylocaine 2841 2850 CHEMICAL 1:100,000 epinephrine 2852 2873 CHEMICAL titanium 4058 4066 CHEMICAL blood loss 4524 4534 DISEASE blood loss 4588 4598 DISEASE blood loss 4645 4655 DISEASE epinephrine 7 ml 187 190 DRUG_DOSE Masticatory dysfunction.,POSTOPERATIVE 113 151 DISEASE Xylocaine 1077 1086 CHEMICAL 1:100,000 epinephrine 1091 1112 CHEMICAL palate 1591 1597 DISEASE titanium 2178 2186 CHEMICAL platelet-rich 2418 2431 CHEMICAL Xylocaine 2841 2850 CHEMICAL 1:100,000 epinephrine 2852 2873 CHEMICAL titanium 4058 4066 CHEMICAL blood loss 4524 4534 DISEASE blood loss 4588 4598 DISEASE blood loss 4645 4655 DISEASE Naprosyn 375 mg 2356 2359 DRUG_DOSE Hallux abductovalgus 28 48 DISEASE bunionette 141 151 DISEASE Degenerative joint disease 169 195 DISEASE Rheumatoid 240 250 DISEASE Hallux abductovalgus 346 366 DISEASE bunionette 459 469 DISEASE Degenerative joint disease 487 513 DISEASE Rheumatoid 558 568 DISEASE hemi implant 698 710 DISEASE rheumatoid arthritis 1290 1310 DISEASE pain 1321 1325 DISEASE pain 1345 1349 DISEASE Marcaine 2268 2276 CHEMICAL lidocaine 2290 2299 CHEMICAL hypertrophic synovium 3961 3982 DISEASE hypertrophic exostosis 4058 4080 DISEASE erosion 4522 4529 DISEASE hypertrophic synovium 4631 4652 DISEASE exostosis 4689 4698 DISEASE lateral exostosis 9734 9751 DISEASE Tylenol 12434 12441 CHEMICAL p.o 12461 12464 CHEMICAL pain 12481 12485 DISEASE Naprosyn 12490 12498 CHEMICAL p.o 12506 12509 CHEMICAL rheumatoid arthritis 12547 12567 DISEASE Wellbutrin 300 mg 141 144 DRUG_DOSE prostate 91 99 DISEASE plasmacytoma 296 308 DISEASE chest pain 355 365 DISEASE Wellbutrin 761 771 CHEMICAL Xanax 786 791 CHEMICAL omeprazole 835 845 CHEMICAL fatigue 1512 1519 DISEASE chest pain 1562 1572 DISEASE chest pain 1984 1994 DISEASE chest pain 2093 2103 DISEASE Xanax 0.25 mg 146 149 DRUG_DOSE prostate 91 99 DISEASE plasmacytoma 296 308 DISEASE chest pain 355 365 DISEASE Wellbutrin 761 771 CHEMICAL Xanax 786 791 CHEMICAL omeprazole 835 845 CHEMICAL fatigue 1512 1519 DISEASE chest pain 1562 1572 DISEASE chest pain 1984 1994 DISEASE chest pain 2093 2103 DISEASE omeprazole 20 mg 158 161 DRUG_DOSE prostate 91 99 DISEASE plasmacytoma 296 308 DISEASE chest pain 355 365 DISEASE Wellbutrin 761 771 CHEMICAL Xanax 786 791 CHEMICAL omeprazole 835 845 CHEMICAL fatigue 1512 1519 DISEASE chest pain 1562 1572 DISEASE chest pain 1984 1994 DISEASE chest pain 2093 2103 DISEASE iron 240 mg 324 327 DRUG_DOSE pain 24 28 DISEASE pain 43 47 DISEASE cancer 106 112 DISEASE cancer 289 295 DISEASE pain 573 577 DISEASE pain 632 636 DISEASE pain 660 664 DISEASE numbness 785 793 DISEASE pain 841 845 DISEASE pain 914 918 DISEASE pain 939 943 DISEASE pain 1002 1006 DISEASE right leg pain 1018 1032 DISEASE pain 1056 1060 DISEASE pain 1093 1097 DISEASE cancer 1346 1352 DISEASE Duragesic 1490 1499 CHEMICAL iron 1549 1553 CHEMICAL Paxil 1574 1579 CHEMICAL Percocet 1597 1605 CHEMICAL warfarin 1678 1686 CHEMICAL Neurontin 1766 1775 CHEMICAL pain 2125 2129 DISEASE muscle weakness 2639 2654 DISEASE Sensations 2858 2868 DISEASE pinprick.,Spine 2930 2945 CHEMICAL tenderness 3080 3090 DISEASE sacroiliac joints 3121 3138 DISEASE hip joints 3684 3694 DISEASE intracranial abnormalities 3974 4000 DISEASE sacroiliac joint syndrome 4171 4196 DISEASE pain 4223 4227 DISEASE lung cancer metastasis 4359 4381 DISEASE primarily pain 4416 4430 DISEASE sacroiliac joints 4461 4478 DISEASE pain 4533 4537 DISEASE facet joints 4612 4624 DISEASE warfarin 4804 4812 CHEMICAL pain 5142 5146 DISEASE pain 5204 5208 DISEASE Percocet 5303 5311 CHEMICAL Paxil 5417 5422 CHEMICAL Cymbalta 5455 5463 CHEMICAL Peripheral neuropathy 5753 5774 DISEASE Paxil 20 mg 330 333 DRUG_DOSE pain 24 28 DISEASE pain 43 47 DISEASE cancer 106 112 DISEASE cancer 289 295 DISEASE pain 573 577 DISEASE pain 632 636 DISEASE pain 660 664 DISEASE numbness 785 793 DISEASE pain 841 845 DISEASE pain 914 918 DISEASE pain 939 943 DISEASE pain 1002 1006 DISEASE right leg pain 1018 1032 DISEASE pain 1056 1060 DISEASE pain 1093 1097 DISEASE cancer 1346 1352 DISEASE Duragesic 1490 1499 CHEMICAL iron 1549 1553 CHEMICAL Paxil 1574 1579 CHEMICAL Percocet 1597 1605 CHEMICAL warfarin 1678 1686 CHEMICAL Neurontin 1766 1775 CHEMICAL pain 2125 2129 DISEASE muscle weakness 2639 2654 DISEASE Sensations 2858 2868 DISEASE pinprick.,Spine 2930 2945 CHEMICAL tenderness 3080 3090 DISEASE sacroiliac joints 3121 3138 DISEASE hip joints 3684 3694 DISEASE intracranial abnormalities 3974 4000 DISEASE sacroiliac joint syndrome 4171 4196 DISEASE pain 4223 4227 DISEASE lung cancer metastasis 4359 4381 DISEASE primarily pain 4416 4430 DISEASE sacroiliac joints 4461 4478 DISEASE pain 4533 4537 DISEASE facet joints 4612 4624 DISEASE warfarin 4804 4812 CHEMICAL pain 5142 5146 DISEASE pain 5204 5208 DISEASE Percocet 5303 5311 CHEMICAL Paxil 5417 5422 CHEMICAL Cymbalta 5455 5463 CHEMICAL Peripheral neuropathy 5753 5774 DISEASE warfarin 1 mg 356 359 DRUG_DOSE pain 24 28 DISEASE pain 43 47 DISEASE cancer 106 112 DISEASE cancer 289 295 DISEASE pain 573 577 DISEASE pain 632 636 DISEASE pain 660 664 DISEASE numbness 785 793 DISEASE pain 841 845 DISEASE pain 914 918 DISEASE pain 939 943 DISEASE pain 1002 1006 DISEASE right leg pain 1018 1032 DISEASE pain 1056 1060 DISEASE pain 1093 1097 DISEASE cancer 1346 1352 DISEASE Duragesic 1490 1499 CHEMICAL iron 1549 1553 CHEMICAL Paxil 1574 1579 CHEMICAL Percocet 1597 1605 CHEMICAL warfarin 1678 1686 CHEMICAL Neurontin 1766 1775 CHEMICAL pain 2125 2129 DISEASE muscle weakness 2639 2654 DISEASE Sensations 2858 2868 DISEASE pinprick.,Spine 2930 2945 CHEMICAL tenderness 3080 3090 DISEASE sacroiliac joints 3121 3138 DISEASE hip joints 3684 3694 DISEASE intracranial abnormalities 3974 4000 DISEASE sacroiliac joint syndrome 4171 4196 DISEASE pain 4223 4227 DISEASE lung cancer metastasis 4359 4381 DISEASE primarily pain 4416 4430 DISEASE sacroiliac joints 4461 4478 DISEASE pain 4533 4537 DISEASE facet joints 4612 4624 DISEASE warfarin 4804 4812 CHEMICAL pain 5142 5146 DISEASE pain 5204 5208 DISEASE Percocet 5303 5311 CHEMICAL Paxil 5417 5422 CHEMICAL Cymbalta 5455 5463 CHEMICAL Peripheral neuropathy 5753 5774 DISEASE Neurontin 300 mg 377 380 DRUG_DOSE pain 24 28 DISEASE pain 43 47 DISEASE cancer 106 112 DISEASE cancer 289 295 DISEASE pain 573 577 DISEASE pain 632 636 DISEASE pain 660 664 DISEASE numbness 785 793 DISEASE pain 841 845 DISEASE pain 914 918 DISEASE pain 939 943 DISEASE pain 1002 1006 DISEASE right leg pain 1018 1032 DISEASE pain 1056 1060 DISEASE pain 1093 1097 DISEASE cancer 1346 1352 DISEASE Duragesic 1490 1499 CHEMICAL iron 1549 1553 CHEMICAL Paxil 1574 1579 CHEMICAL Percocet 1597 1605 CHEMICAL warfarin 1678 1686 CHEMICAL Neurontin 1766 1775 CHEMICAL pain 2125 2129 DISEASE muscle weakness 2639 2654 DISEASE Sensations 2858 2868 DISEASE pinprick.,Spine 2930 2945 CHEMICAL tenderness 3080 3090 DISEASE sacroiliac joints 3121 3138 DISEASE hip joints 3684 3694 DISEASE intracranial abnormalities 3974 4000 DISEASE sacroiliac joint syndrome 4171 4196 DISEASE pain 4223 4227 DISEASE lung cancer metastasis 4359 4381 DISEASE primarily pain 4416 4430 DISEASE sacroiliac joints 4461 4478 DISEASE pain 4533 4537 DISEASE facet joints 4612 4624 DISEASE warfarin 4804 4812 CHEMICAL pain 5142 5146 DISEASE pain 5204 5208 DISEASE Percocet 5303 5311 CHEMICAL Paxil 5417 5422 CHEMICAL Cymbalta 5455 5463 CHEMICAL Peripheral neuropathy 5753 5774 DISEASE Imuran 100 mg 855 858 DRUG_DOSE Crohn's disease 39 54 DISEASE Imuran 124 130 CHEMICAL intermittent obstructive symptoms 249 282 DISEASE abdominal pain 433 447 DISEASE vomiting 452 460 DISEASE obstruction 511 522 DISEASE obstruction 677 688 DISEASE bleeding 862 870 DISEASE fever 1076 1081 DISEASE tenderness 1145 1155 DISEASE anastomotic stricture 1253 1274 DISEASE abdominal pain 1381 1395 DISEASE obstruction 1417 1428 DISEASE Crohn's disease 1480 1495 DISEASE hepatitis 1583 1592 DISEASE Tylenol 1674 1681 CHEMICAL migraine-type headaches 1686 1709 DISEASE Imuran 1757 1763 CHEMICAL Tylenol 1802 1809 CHEMICAL Imuran 1887 1893 CHEMICAL heartburn 1932 1941 DISEASE allergies 2319 2328 DISEASE Crohn's disease 2383 2398 DISEASE vitamin B12 2400 2411 CHEMICAL Imuran 2439 2445 CHEMICAL Vicodin 2457 2464 CHEMICAL indurated 2725 2734 DISEASE erythema 2847 2855 DISEASE edema 2878 2883 DISEASE tenderness 2947 2957 DISEASE Crohn's disease 3347 3362 DISEASE Imuran 3405 3411 CHEMICAL bowel obstruction 3439 3456 DISEASE hepatitis 3517 3526 DISEASE Tylenol 3554 3561 CHEMICAL reflux 3606 3612 DISEASE thrombophlebitis 3639 3655 DISEASE recurrent bowel obstruction type symptoms. 3787 3829 DISEASE abdominal pain 4027 4041 DISEASE nausea 4043 4049 DISEASE vomiting 4051 4059 DISEASE obstructions 4168 4180 DISEASE Imuran 4272 4278 CHEMICAL Tylenol 4320 4327 CHEMICAL migraines 4405 4414 DISEASE Tylenol 4427 4434 CHEMICAL aspirin 4460 4467 CHEMICAL migraine headaches 4546 4564 DISEASE liver disease 4597 4610 DISEASE reflux-type 4763 4774 DISEASE Zantac 5009 5015 CHEMICAL Zantac 150 mg 988 991 DRUG_DOSE Crohn's disease 39 54 DISEASE Imuran 124 130 CHEMICAL intermittent obstructive symptoms 249 282 DISEASE abdominal pain 433 447 DISEASE vomiting 452 460 DISEASE obstruction 511 522 DISEASE obstruction 677 688 DISEASE bleeding 862 870 DISEASE fever 1076 1081 DISEASE tenderness 1145 1155 DISEASE anastomotic stricture 1253 1274 DISEASE abdominal pain 1381 1395 DISEASE obstruction 1417 1428 DISEASE Crohn's disease 1480 1495 DISEASE hepatitis 1583 1592 DISEASE Tylenol 1674 1681 CHEMICAL migraine-type headaches 1686 1709 DISEASE Imuran 1757 1763 CHEMICAL Tylenol 1802 1809 CHEMICAL Imuran 1887 1893 CHEMICAL heartburn 1932 1941 DISEASE allergies 2319 2328 DISEASE Crohn's disease 2383 2398 DISEASE vitamin B12 2400 2411 CHEMICAL Imuran 2439 2445 CHEMICAL Vicodin 2457 2464 CHEMICAL indurated 2725 2734 DISEASE erythema 2847 2855 DISEASE edema 2878 2883 DISEASE tenderness 2947 2957 DISEASE Crohn's disease 3347 3362 DISEASE Imuran 3405 3411 CHEMICAL bowel obstruction 3439 3456 DISEASE hepatitis 3517 3526 DISEASE Tylenol 3554 3561 CHEMICAL reflux 3606 3612 DISEASE thrombophlebitis 3639 3655 DISEASE recurrent bowel obstruction type symptoms. 3787 3829 DISEASE abdominal pain 4027 4041 DISEASE nausea 4043 4049 DISEASE vomiting 4051 4059 DISEASE obstructions 4168 4180 DISEASE Imuran 4272 4278 CHEMICAL Tylenol 4320 4327 CHEMICAL migraines 4405 4414 DISEASE Tylenol 4427 4434 CHEMICAL aspirin 4460 4467 CHEMICAL migraine headaches 4546 4564 DISEASE liver disease 4597 4610 DISEASE reflux-type 4763 4774 DISEASE Zantac 5009 5015 CHEMICAL Vicodin 1- 732 735 DRUG_DOSE Hodgkin's lymphoma 871 889 DISEASE Hodgkin's lymphoma 1041 1059 DISEASE pain 4045 4049 DISEASE Vicodin 4065 4072 CHEMICAL pain 4107 4111 DISEASE Keflex 4128 4134 CHEMICAL morphine 1 mg 268 271 DRUG_DOSE spondylosis 35 46 DISEASE spondylosis 172 183 DISEASE Morphine 458 466 CHEMICAL spondylosis 698 709 DISEASE myelopathy 714 724 DISEASE morphine 1486 1494 CHEMICAL Xylocaine 1575 1584 CHEMICAL epinephrine 1589 1600 CHEMICAL pins 1886 1890 CHEMICAL Keppra 1500 twice 120 123 DRUG_DOSE seizures 270 278 DISEASE hemiparesis 280 291 DISEASE Keppra 577 583 CHEMICAL oxycodone 666 675 CHEMICAL aspirin 680 687 CHEMICAL Prilosec 695 703 CHEMICAL Dilantin 705 713 CHEMICAL oxygen 803 809 CHEMICAL hyperventilation 844 860 DISEASE dysphasia 934 943 DISEASE brain edema 1212 1223 DISEASE malignant meningioma 1236 1256 DISEASE disability 1324 1334 DISEASE tumor 1622 1627 DISEASE tumor 1680 1685 DISEASE Dilantin 300 a 148 151 DRUG_DOSE seizures 270 278 DISEASE hemiparesis 280 291 DISEASE Keppra 577 583 CHEMICAL oxycodone 666 675 CHEMICAL aspirin 680 687 CHEMICAL Prilosec 695 703 CHEMICAL Dilantin 705 713 CHEMICAL oxygen 803 809 CHEMICAL hyperventilation 844 860 DISEASE dysphasia 934 943 DISEASE brain edema 1212 1223 DISEASE malignant meningioma 1236 1256 DISEASE disability 1324 1334 DISEASE tumor 1622 1627 DISEASE tumor 1680 1685 DISEASE Losartan 50 mg 222 225 DRUG_DOSE obtundation 167 178 DISEASE septic shock 260 272 DISEASE cardiac disease 517 532 DISEASE chest pain 781 791 DISEASE shortness of breath 793 812 DISEASE hypertension 876 888 DISEASE diabetes mellitus 904 921 DISEASE cholesterol 934 945 CHEMICAL coronary artery disease 994 1017 DISEASE Losartan 1232 1240 CHEMICAL Nifedipine 1258 1268 CHEMICAL Potassium 1297 1306 CHEMICAL fibrosis 1445 1453 DISEASE prednisone 1458 1468 CHEMICAL cellulitis 1487 1497 DISEASE infection 1528 1537 DISEASE respiratory acidosis 1591 1611 DISEASE septicemia 1613 1623 DISEASE septic shock 1628 1640 DISEASE creatinine 2385 2395 CHEMICAL potassium 2401 2410 CHEMICAL bicarbonate 2419 2430 CHEMICAL pulmonary fibrosis 2600 2618 DISEASE prednisone 2623 2633 CHEMICAL oxygen 2635 2641 CHEMICAL Septicemia 2683 2693 DISEASE septic shock 2695 2707 DISEASE cellulitis of the leg.,3 2721 2745 DISEASE sepsis 2852 2858 DISEASE sepsis 3176 3182 DISEASE Nifedipine 90 mg 228 231 DRUG_DOSE obtundation 167 178 DISEASE septic shock 260 272 DISEASE cardiac disease 517 532 DISEASE chest pain 781 791 DISEASE shortness of breath 793 812 DISEASE hypertension 876 888 DISEASE diabetes mellitus 904 921 DISEASE cholesterol 934 945 CHEMICAL coronary artery disease 994 1017 DISEASE Losartan 1232 1240 CHEMICAL Nifedipine 1258 1268 CHEMICAL Potassium 1297 1306 CHEMICAL fibrosis 1445 1453 DISEASE prednisone 1458 1468 CHEMICAL cellulitis 1487 1497 DISEASE infection 1528 1537 DISEASE respiratory acidosis 1591 1611 DISEASE septicemia 1613 1623 DISEASE septic shock 1628 1640 DISEASE creatinine 2385 2395 CHEMICAL potassium 2401 2410 CHEMICAL bicarbonate 2419 2430 CHEMICAL pulmonary fibrosis 2600 2618 DISEASE prednisone 2623 2633 CHEMICAL oxygen 2635 2641 CHEMICAL Septicemia 2683 2693 DISEASE septic shock 2695 2707 DISEASE cellulitis of the leg.,3 2721 2745 DISEASE sepsis 2852 2858 DISEASE sepsis 3176 3182 DISEASE creatinine 8.7, 435 438 DRUG_DOSE obtundation 167 178 DISEASE septic shock 260 272 DISEASE cardiac disease 517 532 DISEASE chest pain 781 791 DISEASE shortness of breath 793 812 DISEASE hypertension 876 888 DISEASE diabetes mellitus 904 921 DISEASE cholesterol 934 945 CHEMICAL coronary artery disease 994 1017 DISEASE Losartan 1232 1240 CHEMICAL Nifedipine 1258 1268 CHEMICAL Potassium 1297 1306 CHEMICAL fibrosis 1445 1453 DISEASE prednisone 1458 1468 CHEMICAL cellulitis 1487 1497 DISEASE infection 1528 1537 DISEASE respiratory acidosis 1591 1611 DISEASE septicemia 1613 1623 DISEASE septic shock 1628 1640 DISEASE creatinine 2385 2395 CHEMICAL potassium 2401 2410 CHEMICAL bicarbonate 2419 2430 CHEMICAL pulmonary fibrosis 2600 2618 DISEASE prednisone 2623 2633 CHEMICAL oxygen 2635 2641 CHEMICAL Septicemia 2683 2693 DISEASE septic shock 2695 2707 DISEASE cellulitis of the leg.,3 2721 2745 DISEASE sepsis 2852 2858 DISEASE sepsis 3176 3182 DISEASE potassium 7.3, 438 441 DRUG_DOSE obtundation 167 178 DISEASE septic shock 260 272 DISEASE cardiac disease 517 532 DISEASE chest pain 781 791 DISEASE shortness of breath 793 812 DISEASE hypertension 876 888 DISEASE diabetes mellitus 904 921 DISEASE cholesterol 934 945 CHEMICAL coronary artery disease 994 1017 DISEASE Losartan 1232 1240 CHEMICAL Nifedipine 1258 1268 CHEMICAL Potassium 1297 1306 CHEMICAL fibrosis 1445 1453 DISEASE prednisone 1458 1468 CHEMICAL cellulitis 1487 1497 DISEASE infection 1528 1537 DISEASE respiratory acidosis 1591 1611 DISEASE septicemia 1613 1623 DISEASE septic shock 1628 1640 DISEASE creatinine 2385 2395 CHEMICAL potassium 2401 2410 CHEMICAL bicarbonate 2419 2430 CHEMICAL pulmonary fibrosis 2600 2618 DISEASE prednisone 2623 2633 CHEMICAL oxygen 2635 2641 CHEMICAL Septicemia 2683 2693 DISEASE septic shock 2695 2707 DISEASE cellulitis of the leg.,3 2721 2745 DISEASE sepsis 2852 2858 DISEASE sepsis 3176 3182 DISEASE Lortab 7.5 to 465 468 DRUG_DOSE pain 414 418 DISEASE pain 633 637 DISEASE pain 653 657 DISEASE pain 749 753 DISEASE pain 761 765 DISEASE pain 2078 2082 DISEASE Lortab 2103 2109 CHEMICAL pain 2250 2254 DISEASE lidocaine 2% 293 296 DRUG_DOSE left mandibular vestibular abscess 340 374 DISEASE swelling 764 772 DISEASE carious teeth 789 802 DISEASE abscessed 819 828 DISEASE carious teeth 952 965 DISEASE throat 1317 1323 DISEASE lidocaine 1422 1431 CHEMICAL 1:100,000 epinephrine 1440 1461 CHEMICAL Marcaine 1477 1485 CHEMICAL 1:200,000 epinephrine 1496 1517 CHEMICAL throat 2417 2423 DISEASE Marcaine 0.5% 304 307 DRUG_DOSE left mandibular vestibular abscess 340 374 DISEASE swelling 764 772 DISEASE carious teeth 789 802 DISEASE abscessed 819 828 DISEASE carious teeth 952 965 DISEASE throat 1317 1323 DISEASE lidocaine 1422 1431 CHEMICAL 1:100,000 epinephrine 1440 1461 CHEMICAL Marcaine 1477 1485 CHEMICAL 1:200,000 epinephrine 1496 1517 CHEMICAL throat 2417 2423 DISEASE Nexium 40 mg 302 305 DRUG_DOSE erosions 349 357 DISEASE bleeding 873 881 DISEASE varix 1055 1060 DISEASE bleeding 1502 1510 DISEASE Nexium 1575 1581 CHEMICAL
Second Part
First Phase (First Paragraph)
In [14]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import string
import re
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.manifold import TSNE
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.stem import WordNetLemmatizer
from imblearn.over_sampling import SMOTE
In [15]:
# A method to get unique words(vocabulary) and sentence count in a list of text
def get_sentence_word_count(text_list):
sent_count = 0
word_count = 0
vocab = {}
for text in text_list:
sentences=sent_tokenize(str(text).lower())
sent_count = sent_count + len(sentences)
for sentence in sentences:
words=word_tokenize(sentence)
for word in words:
if(word in vocab.keys()):
vocab[word] = vocab[word] +1
else:
vocab[word] =1
word_count = len(vocab.keys())
return sent_count,word_count
In [16]:
clinical_text_df = pd.read_csv("mtsamples.csv")
print(clinical_text_df.columns)
clinical_text_df.head(5)
Index(['Unnamed: 0', 'description', 'medical_specialty', 'sample_name',
'transcription', 'keywords'],
dtype='object')
Out[16]:
| Unnamed: 0 | description | medical_specialty | sample_name | transcription | keywords | |
|---|---|---|---|---|---|---|
| 0 | 0 | A 23-year-old white female presents with comp... | Allergy / Immunology | Allergic Rhinitis | SUBJECTIVE:, This 23-year-old white female pr... | allergy / immunology, allergic rhinitis, aller... |
| 1 | 1 | Consult for laparoscopic gastric bypass. | Bariatrics | Laparoscopic Gastric Bypass Consult - 2 | PAST MEDICAL HISTORY:, He has difficulty climb... | bariatrics, laparoscopic gastric bypass, weigh... |
| 2 | 2 | Consult for laparoscopic gastric bypass. | Bariatrics | Laparoscopic Gastric Bypass Consult - 1 | HISTORY OF PRESENT ILLNESS: , I have seen ABC ... | bariatrics, laparoscopic gastric bypass, heart... |
| 3 | 3 | 2-D M-Mode. Doppler. | Cardiovascular / Pulmonary | 2-D Echocardiogram - 1 | 2-D M-MODE: , ,1. Left atrial enlargement wit... | cardiovascular / pulmonary, 2-d m-mode, dopple... |
| 4 | 4 | 2-D Echocardiogram | Cardiovascular / Pulmonary | 2-D Echocardiogram - 2 | 1. The left ventricular cavity size and wall ... | cardiovascular / pulmonary, 2-d, doppler, echo... |
In [17]:
clinical_text_df = clinical_text_df[clinical_text_df['transcription'].notna()]
sent_count,word_count= get_sentence_word_count(clinical_text_df['transcription'].tolist())
print("Number of sentences in transcriptions column: "+ str(sent_count))
print("Number of unique words in transcriptions column: "+str(word_count))
data_categories = clinical_text_df.groupby(clinical_text_df['medical_specialty'])
i = 1
print('===========Original Categories =======================')
for catName,dataCategory in data_categories:
print('Cat:'+str(i)+' '+catName + ' : '+ str(len(dataCategory)) )
i = i+1
print('==================================')
Number of sentences in transcriptions column: 140235 Number of unique words in transcriptions column: 35805 ===========Original Categories ======================= Cat:1 Allergy / Immunology : 7 Cat:2 Autopsy : 8 Cat:3 Bariatrics : 18 Cat:4 Cardiovascular / Pulmonary : 371 Cat:5 Chiropractic : 14 Cat:6 Consult - History and Phy. : 516 Cat:7 Cosmetic / Plastic Surgery : 27 Cat:8 Dentistry : 27 Cat:9 Dermatology : 29 Cat:10 Diets and Nutritions : 10 Cat:11 Discharge Summary : 108 Cat:12 ENT - Otolaryngology : 96 Cat:13 Emergency Room Reports : 75 Cat:14 Endocrinology : 19 Cat:15 Gastroenterology : 224 Cat:16 General Medicine : 259 Cat:17 Hematology - Oncology : 90 Cat:18 Hospice - Palliative Care : 6 Cat:19 IME-QME-Work Comp etc. : 16 Cat:20 Lab Medicine - Pathology : 8 Cat:21 Letters : 23 Cat:22 Nephrology : 81 Cat:23 Neurology : 223 Cat:24 Neurosurgery : 94 Cat:25 Obstetrics / Gynecology : 155 Cat:26 Office Notes : 50 Cat:27 Ophthalmology : 83 Cat:28 Orthopedic : 355 Cat:29 Pain Management : 61 Cat:30 Pediatrics - Neonatal : 70 Cat:31 Physical Medicine - Rehab : 21 Cat:32 Podiatry : 47 Cat:33 Psychiatry / Psychology : 53 Cat:34 Radiology : 273 Cat:35 Rheumatology : 10 Cat:36 SOAP / Chart / Progress Notes : 166 Cat:37 Sleep Medicine : 20 Cat:38 Speech - Language : 9 Cat:39 Surgery : 1088 Cat:40 Urology : 156 ==================================
We removed the transcriptions having category labels less than 50
In [18]:
filtered_data_categories = data_categories.filter(lambda x:x.shape[0] > 50)
final_data_categories = filtered_data_categories.groupby(filtered_data_categories['medical_specialty'])
i=1
print('============Reduced Categories ======================')
for catName,dataCategory in final_data_categories:
print('Cat:'+str(i)+' '+catName + ' : '+ str(len(dataCategory)) )
i = i+1
print('============ Reduced Categories ======================')
============Reduced Categories ====================== Cat:1 Cardiovascular / Pulmonary : 371 Cat:2 Consult - History and Phy. : 516 Cat:3 Discharge Summary : 108 Cat:4 ENT - Otolaryngology : 96 Cat:5 Emergency Room Reports : 75 Cat:6 Gastroenterology : 224 Cat:7 General Medicine : 259 Cat:8 Hematology - Oncology : 90 Cat:9 Nephrology : 81 Cat:10 Neurology : 223 Cat:11 Neurosurgery : 94 Cat:12 Obstetrics / Gynecology : 155 Cat:13 Ophthalmology : 83 Cat:14 Orthopedic : 355 Cat:15 Pain Management : 61 Cat:16 Pediatrics - Neonatal : 70 Cat:17 Psychiatry / Psychology : 53 Cat:18 Radiology : 273 Cat:19 SOAP / Chart / Progress Notes : 166 Cat:20 Surgery : 1088 Cat:21 Urology : 156 ============ Reduced Categories ======================
Plot the categories
In [19]:
plt.figure(figsize=(10,10))
sns.countplot(y='medical_specialty', data = filtered_data_categories )
plt.show()
In [20]:
# We are interested only in the 'transcription' and 'medical_specialty' columns in the dataset
data = filtered_data_categories[['transcription', 'medical_specialty']]
data = data.drop(data[data['transcription'].isna()].index)
data.shape
Out[20]:
(4597, 2)
In [21]:
print('Sample Transcription 1:'+data.iloc[5]['transcription']+'\n')
print('Sample Transcription 2:'+data.iloc[125]['transcription']+'\n')
print('Sample Transcription 3:'+data.iloc[1000]['transcription'])
Sample Transcription 1:CC:, Confusion and slurred speech.,HX , (primarily obtained from boyfriend): This 31 y/o RHF experienced a "flu-like illness 6-8 weeks prior to presentation. 3-4 weeks prior to presentation, she was found "passed out" in bed, and when awoken appeared confused, and lethargic. She apparently recovered within 24 hours. For two weeks prior to presentation she demonstrated emotional lability, uncharacteristic of her ( outbursts of anger and inappropriate laughter). She left a stove on.,She began slurring her speech 2 days prior to admission. On the day of presentation she developed right facial weakness and began stumbling to the right. She denied any associated headache, nausea, vomiting, fever, chills, neck stiffness or visual change. There was no history of illicit drug/ETOH use or head trauma.,PMH:, Migraine Headache.,FHX: , Unremarkable.,SHX: ,Divorced. Lives with boyfriend. 3 children alive and well. Denied tobacco/illicit drug use. Rarely consumes ETOH.,ROS:, Irregular menses.,EXAM: ,BP118/66. HR83. RR 20. T36.8C.,MS: Alert and oriented to name only. Perseverative thought processes. Utilized only one or two word answers/phrases. Non-fluent. Rarely followed commands. Impaired writing of name.,CN: Flattened right nasolabial fold only.,Motor: Mild weakness in RUE manifested by pronator drift. Other extremities were full strength.,Sensory: withdrew to noxious stimulation in all 4 extremities.,Coordination: difficult to assess.,Station: Right pronator drift.,Gait: unremarkable.,Reflexes: 2/2BUE, 3/3BLE, Plantars were flexor bilaterally.,General Exam: unremarkable.,INITIAL STUDIES:, CBC, GS, UA, PT, PTT, ESR, CRP, EKG were all unremarkable. Outside HCT showed hypodensities in the right putamen, left caudate, and at several subcortical locations (not specified).,COURSE: ,MRI Brian Scan, 2/11/92 revealed an old lacunar infarct in the right basal ganglia, edema within the head of the left caudate nucleus suggesting an acute ischemic event, and arterial enhancement of the left MCA distribution suggesting slow flow. The latter suggested a vasculopathy such as Moya Moya, or fibromuscular dysplasia. HIV, ANA, Anti-cardiolipin Antibody titer, Cardiac enzymes, TFTs, B12, and cholesterol studies were unremarkable.,She underwent a cerebral angiogram on 2/12/92. This revealed an occlusion of the left MCA just distal to its origin. The distal distribution of the left MCA filled on later films through collaterals from the left ACA. There was also an occlusion of the right MCA just distal to the temporal branch. Distal branches of the right MCA filled through collaterals from the right ACA. No other vascular abnormalities were noted. These findings were felt to be atypical but nevertheless suspicious of a large caliber vasculitis such as Moya Moya disease. She was subsequently given this diagnosis. Neuropsychologic testing revealed widespread cognitive dysfunction with particular impairment of language function. She had long latencies responding and understood only simple questions. Affect was blunted and there was distinct lack of concern regarding her condition. She was subsequently discharged home on no medications.,In 9/92 she was admitted for sudden onset right hemiparesis and mental status change. Exam revealed the hemiparesis and in addition she was found to have significant neck lymphadenopathy. OB/GYN exam including cervical biopsy, and abdominal/pelvic CT scanning revealed stage IV squamous cell cancer of the cervix. She died 9/24/92 of cervical cancer. Sample Transcription 2:ADMITTING DIAGNOSES:,1. Hematuria.,2. Benign prostatic hyperplasia.,3. Osteoarthritis.,DISCHARGE DIAGNOSES:,1. Hematuria, resolved.,2. Benign prostatic hyperplasia.,3. Complex renal cyst versus renal cell carcinoma or other tumor.,4. Osteoarthritis.,HOSPITAL COURSE:, This is a 77-year-old African-American male who was previously well until he began having gross hematuria and clots passing through his urethra on the day of admission. He stated that he never had blood in his urine before, however, he does have a past history of BPH and he had a transurethral resection of prostate more than 10 years ago. He was admitted to a regular bed. Dr. G of Urology was consulted for evaluation of his hematuria. During the workup for this, he had a CT of the abdomen and pelvis with and without contrast with early and late-phase imaging for evaluation of the kidneys and collecting system. At that time, he was shown to have multiple bilateral renal cysts with one that did not meet classification as a simple cyst and ultrasound was recommended.,He had an ultrasound done of the cyst which showed a 2.1 x 2.7 cm mass arising from the right kidney which, again, did not fit ultrasound criteria for a simple cyst and they recommended further evaluation by an MRI as this could be a hemorrhagic cyst or a solid mass or tumor, so an MRI was scheduled on the day of discharge for further evaluation of this. The report was not back at discharge. The patient had a cystoscopy and transurethral resection of prostate as well with entire resection of the prostate gland. Pathology on this specimen showed multiple portions of prostatic tissue which was primarily fibromuscular, and he was diagnosed with nonprostatic hyperplasia. His urine slowly cleared. He tolerated a regular diet with no difficulties in his activities of daily living, and his Foley was removed on the day of discharge.,He was started on ciprofloxacin, Colace, and Lasix after the transurethral resection and continued these for a short course. He is asked to continue the Colace as an outpatient for stool softening for comfort.,DISCHARGE MEDICATIONS:, Colace 100 mg 1 b.i.d.,DISCHARGE FOLLOWUP PLANNING:, The patient is to follow up with his primary care physician at ABCD, Dr. B or Dr. J, the patient is unsure of which, in the next couple weeks. He is to follow up with Dr. G of Urology in the next week by phone in regards to the patient's MRI and plans for a laparoscopic partial renal resection biopsy. This is scheduled for the week after discharge potentially by Dr. G, and the patient will discuss the exact time later this week. The patient is to return to the emergency room or to our clinic if he has worsening hematuria again or no urine output. Sample Transcription 3:PREOPERATIVE DIAGNOSES: , Phimosis and adhesions.,POSTOPERATIVE DIAGNOSES: ,Phimosis and adhesions.,PROCEDURES PERFORMED: , Circumcision and release of ventral chordee.,ANESTHESIA: ,Local MAC.,ESTIMATED BLOOD LOSS: , Minimal.,FLUIDS: , Crystalloid. The patient was given antibiotics preop.,BRIEF HISTORY: , This is a 43-year-old male who presented to us with significant phimosis, difficulty retracting the foreskin. The patient had buried penis with significant obesity issues in the suprapubic area. Options such as watchful waiting, continuation of slowly retracting the skin, applying betamethasone cream, and circumcision were discussed. Risk of anesthesia, bleeding, infection, pain, MI, DVT, PE, and CVA risks were discussed. The patient had discussed this issue with Dr Khan and had been approved to get off of the Plavix. Consent had been obtained. Risk of scarring, decrease in penile sensation, and unexpected complications were discussed. The patient was told about removing the dressing tomorrow morning, okay to shower after 48 hours, etc. Consent was obtained.,DESCRIPTION OF PROCEDURE: ,The patient was brought to the OR. Anesthesia was applied. The patient was placed in supine position. The patient was prepped and draped in usual sterile fashion. Local MAC anesthesia was applied. After draping, 17 mL of mixture of 0.25% Marcaine and 1% lidocaine plain were applied around the dorsal aspect of the penis for dorsal block. The patient had significant phimosis and slight ventral chordee. Using marking pen, the excess foreskin was marked off. Using a knife, the ventral chordee was released. The urethra was intact. The excess foreskin was removed. Hemostasis was obtained using electrocautery. A 5-0 Monocryl stitches were used for 4 interrupted stitches and horizontal mattresses were done. The patient tolerated the procedure well. There was excellent hemostasis. The penis was straight. Vaseline gauze and Kerlix were applied. The patient was brought to the recovery in stable condition. Plan was for removal of the dressing tomorrow. Okay to shower after 48 hours.
Cleaning the data
In [22]:
def clean_text(text ):
text = text.translate(str.maketrans('', '', string.punctuation))
text1 = ''.join([w for w in text if not w.isdigit()])
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
#BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
text2 = text1.lower()
text2 = REPLACE_BY_SPACE_RE.sub('', text2) # replace REPLACE_BY_SPACE_RE symbols by space in text
#text2 = BAD_SYMBOLS_RE.sub('', text2)
return text2
def lemmatize_text(text):
wordlist=[]
lemmatizer = WordNetLemmatizer()
sentences=sent_tokenize(text)
intial_sentences= sentences[0:1]
final_sentences = sentences[len(sentences)-2: len(sentences)-1]
for sentence in intial_sentences:
words=word_tokenize(sentence)
for word in words:
wordlist.append(lemmatizer.lemmatize(word))
for sentence in final_sentences:
words=word_tokenize(sentence)
for word in words:
wordlist.append(lemmatizer.lemmatize(word))
return ' '.join(wordlist)
In [23]:
data['transcription'] = data['transcription'].apply(lemmatize_text)
data['transcription'] = data['transcription'].apply(clean_text)
In [24]:
print('Sample Transcription 1:'+data.iloc[5]['transcription']+'\n')
print('Sample Transcription 2:'+data.iloc[125]['transcription']+'\n')
print('Sample Transcription 3:'+data.iloc[1000]['transcription'])
Sample Transcription 1:cc confusion and slurred speech hx primarily obtained from boyfriend this yo rhf experienced a flulike illness week prior to presentation obgyn exam including cervical biopsy and abdominalpelvic ct scanning revealed stage iv squamous cell cancer of the cervix Sample Transcription 2:admitting diagnoses this is scheduled for the week after discharge potentially by dr g and the patient will discus the exact time later this week Sample Transcription 3:preoperative diagnoses phimosis and adhesions postoperative diagnoses phimosis and adhesions procedures performed circumcision and release of ventral chordee anesthesia local mac estimated blood loss minimal fluids crystalloid plan wa for removal of the dressing tomorrow
Feature Extraction (Converting text data into a numerical form)
TF-IDF:
In [25]:
tfidfVectorizer = TfidfVectorizer(analyzer='word', stop_words='english',ngram_range=(1,3), max_df=0.75, use_idf=True, smooth_idf=True, max_features=1000)
tfIdfMat = tfidfVectorizer.fit_transform(data['transcription'].tolist())
tfidf_feature_names = sorted(tfidfVectorizer.get_feature_names_out())
print(tfidf_feature_names)
['abc', 'abcd', 'abdomen', 'abdomen pelvis', 'abdominal', 'abdominal pain', 'abnormal', 'abscess', 'activity', 'acute', 'additional', 'adenocarcinoma', 'adequate', 'administered', 'admission', 'admitted', 'admitting', 'africanamerican', 'age', 'ago', 'airway', 'alcohol', 'anemia', 'anesthesia', 'anesthesia care', 'anesthesia general', 'anesthesia general endotracheal', 'anesthesia local', 'anesthetic', 'angina', 'angiography', 'ankle', 'anterior', 'antibiotic', 'aortic', 'apnea', 'apparent', 'appendicitis', 'applied', 'appointment', 'appropriate', 'approximately', 'area', 'arm', 'artery', 'artery disease', 'arthritis', 'arthroplasty', 'asked', 'aspect', 'aspiration', 'assessment', 'associated', 'atrial', 'atrial fibrillation', 'awakened', 'axial', 'axial ct', 'axial ct image', 'axis', 'baby', 'base', 'benefit', 'benign', 'better', 'bid', 'bilateral', 'bilaterally', 'biopsy', 'bladder', 'bleeding', 'block', 'blood', 'blood loss', 'blood loss cc', 'blood loss minimal', 'blood loss ml', 'blood loss wa', 'blood pressure', 'body', 'bone', 'bowel', 'boy', 'brain', 'breast', 'breath', 'breathing', 'brief', 'brief history', 'brief history patient', 'bronchoscopy', 'brought', 'brought operating', 'brought operating room', 'bunion', 'bypass', 'cancer', 'carcinoma', 'cardiac', 'care', 'care unit', 'carotid', 'carpal', 'carpal tunnel', 'carpal tunnel syndrome', 'case', 'cataract', 'cataract right', 'cataract right eye', 'catheter', 'catheterization', 'caucasian', 'caucasian female', 'caucasian male', 'cc', 'cc cc', 'cell', 'cell carcinoma', 'central', 'cervical', 'cervical spine', 'chamber', 'change', 'check', 'chest', 'chest pain', 'chief', 'chief complaint', 'child', 'cholecystectomy', 'cholecystitis', 'cholelithiasis', 'chronic', 'circumcision', 'clear', 'clinic', 'clinical', 'closed', 'closed vicryl', 'closure', 'cm', 'colon', 'colonoscopy', 'come', 'common', 'complaining', 'complaint', 'complete', 'completed', 'complex', 'complication', 'complications', 'complications estimated', 'complications estimated blood', 'compression', 'concern', 'condition', 'consent', 'consent wa', 'consent wa obtained', 'consistent', 'consult', 'consultation', 'continue', 'continued', 'contrast', 'contrast reason', 'contrast reason exam', 'control', 'cord', 'coronal', 'coronary', 'coronary artery', 'coronary artery disease', 'correct', 'cough', 'count', 'count correct', 'course', 'crystalloid', 'ct', 'ct abdomen', 'ct image', 'ct scan', 'current', 'cyst', 'cystoscopy', 'daily', 'data', 'date', 'day', 'days', 'deep', 'defect', 'deformity', 'degenerative', 'delivery', 'denies', 'dental', 'department', 'depression', 'descending', 'description', 'description procedure', 'description procedure patient', 'developed', 'diabetes', 'diabetes mellitus', 'diagnosed', 'diagnoses', 'diagnoses patient', 'diagnoses patient wa', 'diagnoses sponge', 'diagnoses wa', 'diagnosis', 'diagnosis acute', 'diagnosis bilateral', 'diagnosis cataract', 'diagnosis cervical', 'diagnosis chronic', 'diagnosis es', 'diagnosis left', 'diagnosis recurrent', 'diagnosis right', 'diagnostic', 'diameter', 'diarrhea', 'did', 'difficulty', 'direct', 'disc', 'discectomy', 'discharge', 'discharge diagnoses', 'discharge diagnosis', 'discharged', 'discomfort', 'discussed', 'discussed patient', 'disease', 'disk', 'disorder', 'disposition', 'disposition patient', 'distal', 'distress', 'doe', 'doing', 'dorsal', 'dr', 'drain', 'drainage', 'drains', 'draped', 'draped sterile', 'draped usual', 'dressing', 'dressing applied', 'dressing wa', 'dressing wa applied', 'drop', 'dysphagia', 'ear', 'echocardiogram', 'edema', 'effusion', 'ekg', 'elbow', 'elevated', 'emergency', 'emergency department', 'emergency room', 'end', 'end procedure', 'endoscopy', 'endotracheal', 'endotracheal anesthesia', 'endotracheal tube', 'endstage', 'endstage renal', 'endstage renal disease', 'epidural', 'episode', 'es', 'esophageal', 'estimated', 'estimated blood', 'estimated blood loss', 'evaluate', 'evaluation', 'evidence', 'exam', 'exam ct', 'exam mri', 'examination', 'excision', 'exercise', 'explained', 'explained patient', 'external', 'extraction', 'extremity', 'extubated', 'eye', 'eye postoperative', 'eye postoperative diagnosis', 'eye procedure', 'face', 'failure', 'fall', 'family', 'family history', 'fascia', 'fashion', 'felt', 'female', 'female history', 'female present', 'female wa', 'femoral', 'fetal', 'fever', 'fibrillation', 'final', 'finding', 'findings', 'findings patient', 'finger', 'fixation', 'flow', 'fluid', 'fluids', 'foley', 'follow', 'followed', 'following', 'followup', 'foot', 'foramen', 'foreign', 'foreign body', 'fracture', 'free', 'french', 'frontal', 'function', 'fusion', 'general', 'general anesthesia', 'general endotracheal', 'general endotracheal anesthesia', 'gentleman', 'gi', 'given', 'going', 'good', 'good condition', 'grade', 'graft', 'greater', 'gross', 'ha', 'ha history', 'hand', 'hardware', 'having', 'head', 'headache', 'health', 'hearing', 'heart', 'heart rate', 'hematoma', 'hemorrhage', 'hemostasis', 'hernia', 'herniated', 'herniated nucleus', 'herniated nucleus pulposus', 'high', 'hip', 'history', 'history patient', 'history patient yearold', 'history present', 'history present illness', 'history yearold', 'home', 'hospital', 'hour', 'hx', 'hx yo', 'hx yo rhf', 'hx yo rhm', 'hypertension', 'hypertrophy', 'identified', 'ii', 'illness', 'illness patient', 'illness patient pleasant', 'illness patient yearold', 'illness yearold', 'illness yearold female', 'illness yearold male', 'image', 'imaging', 'implantation', 'impression', 'incision', 'incision wa', 'including', 'increased', 'indication', 'indication surgery', 'indications', 'indications patient', 'indications patient yearold', 'indications procedure', 'indications procedure patient', 'indications surgery', 'infection', 'inferior', 'informed', 'informed consent', 'informed consent wa', 'inguinal', 'inguinal hernia', 'initially', 'injected', 'injection', 'injury', 'insertion', 'instructed', 'instruction', 'instrument', 'intact', 'internal', 'interpretation', 'interrupted', 'intervention', 'intraocular', 'intraocular lens', 'intraoperative', 'intravenous', 'irrigated', 'iv', 'joint', 'kidney', 'knee', 'known', 'laceration', 'lady', 'lap', 'laparoscopic', 'large', 'lateral', 'layer', 'left', 'left breast', 'left foot', 'left hip', 'left knee', 'left lower', 'left lower extremity', 'left shoulder', 'left upper', 'leg', 'length', 'lens', 'lesion', 'level', 'lidocaine', 'ligament', 'likely', 'liver', 'll', 'lobe', 'local', 'loss', 'loss cc', 'loss minimal', 'loss ml', 'loss wa', 'low', 'low pain', 'lower', 'lower extremity', 'lower quadrant', 'ls', 'lumbar', 'lung', 'lymph', 'lymph node', 'mac', 'male', 'male present', 'male wa', 'man', 'management', 'manner', 'marcaine', 'mass', 'medial', 'medical', 'medical history', 'medication', 'medications', 'medium', 'mellitus', 'membrane', 'mental', 'metastatic', 'mg', 'middle', 'mild', 'minimal', 'minute', 'minutes', 'ml', 'mm', 'moderate', 'mom', 'monitored', 'monocryl', 'month', 'monthold', 'morning', 'mother', 'motor', 'mouth', 'mr', 'mri', 'ms', 'multiple', 'muscle', 'myocardial', 'nasal', 'nausea', 'nausea vomiting', 'neck', 'need', 'needed', 'needle', 'needle count', 'needle count correct', 'negative', 'nerve', 'neurologic', 'new', 'night', 'node', 'noncontrast', 'normal', 'nose', 'note', 'noted', 'nuclear', 'nucleus', 'nucleus pulposus', 'numbness', 'obstruction', 'obstructive', 'obtained', 'obtained patient', 'obtained patient wa', 'obtaining', 'office', 'old', 'onset', 'open', 'operating', 'operating room', 'operating room placed', 'operating table', 'operation', 'operation patient', 'operation performed', 'operations', 'operative', 'operative procedure', 'oral', 'osteoarthritis', 'otitis', 'outpatient', 'ox', 'pacemaker', 'pacu', 'pain', 'pain history', 'pain history present', 'parent', 'partial', 'past', 'past medical', 'past medical history', 'patent', 'pathology', 'patient', 'patient ha', 'patient monthold', 'patient pleasant', 'patient pleasant yearold', 'patient present', 'patient tolerated', 'patient tolerated procedure', 'patient wa', 'patient wa brought', 'patient wa extubated', 'patient wa placed', 'patient wa taken', 'patient yearold', 'patient yearold africanamerican', 'patient yearold caucasian', 'patient yearold female', 'patient yearold gentleman', 'patient yearold male', 'patient yearold white', 'patient yearold woman', 'pelvic', 'pelvis', 'percutaneous', 'performed', 'performed patient', 'perfusion', 'period', 'persistent', 'phacoemulsification', 'physical', 'physical examination', 'physician', 'place', 'placed', 'placed supine', 'placed supine position', 'placement', 'plan', 'plate', 'pleasant', 'pleasant yearold', 'pleural', 'pleural effusion', 'pneumonia', 'point', 'polyp', 'position', 'positive', 'possible', 'post', 'posterior', 'postoperative', 'postoperative diagnoses', 'postoperative diagnosis', 'postoperative diagnosis left', 'postoperative diagnosis right', 'postprocedure', 'pregnancy', 'preoperative', 'preoperative diagnoses', 'preoperative diagnoses patient', 'preoperative diagnoses sponge', 'preoperative diagnosis', 'preoperative diagnosis acute', 'preoperative diagnosis bilateral', 'preoperative diagnosis left', 'preoperative diagnosis right', 'prepped', 'prepped draped', 'prepped draped usual', 'preprocedure', 'present', 'present illness', 'present illness patient', 'present illness yearold', 'present today', 'presentation', 'presented', 'presented emergency', 'presented emergency room', 'pressure', 'previous', 'previously', 'primary', 'prior', 'prn', 'problem', 'procedure', 'procedure informed', 'procedure informed consent', 'procedure left', 'procedure patient', 'procedure patient wa', 'procedure patient yearold', 'procedure performed', 'procedure right', 'procedure wa', 'procedure yearold', 'procedures', 'procedures performed', 'progressive', 'prostate', 'prostate cancer', 'protocol', 'proximal', 'pulmonary', 'pulposus', 'quadrant', 'question', 'radial', 'radiation', 'radiculopathy', 'rate', 'reason', 'reason consult', 'reason consultation', 'reason exam', 'reason visit', 'received', 'recent', 'recently', 'recommendation', 'recommended', 'reconstruction', 'recovery', 'recovery room', 'recovery room satisfactory', 'recovery room stable', 'recovery stable', 'rectal', 'recurrent', 'reduction', 'referral', 'referred', 'reflux', 'regarding', 'region', 'related', 'release', 'removal', 'removed', 'renal', 'renal disease', 'renal mass', 'repair', 'replacement', 'report', 'resection', 'residual', 'respiratory', 'result', 'return', 'returned', 'revealed', 'review', 'reviewed', 'rhf', 'rhm', 'right', 'right breast', 'right eye', 'right eye postoperative', 'right foot', 'right inguinal', 'right inguinal hernia', 'right knee', 'right lower', 'right shoulder', 'right upper', 'righthanded', 'risk', 'risk benefit', 'room', 'room placed', 'room placed supine', 'room satisfactory', 'room satisfactory condition', 'room stable', 'room stable condition', 'routine', 'rule', 'running', 'ruptured', 'satisfactory', 'satisfactory condition', 'scan', 'scope', 'screening', 'screw', 'second', 'secondary', 'sedation', 'seen', 'seizure', 'sent', 'service', 'severe', 'shortness', 'shortness breath', 'shoulder', 'showed', 'shunt', 'sign', 'significant', 'signs', 'single', 'sinus', 'site', 'size', 'skin', 'skin wa', 'sleep', 'small', 'soft', 'soft tissue', 'solution', 'space', 'specimen', 'specimens', 'speech', 'spinal', 'spine', 'spondylosis', 'sponge', 'sponge lap', 'sponge needle', 'sponge needle count', 'squamous', 'squamous cell', 'squamous cell carcinoma', 'stable', 'stable condition', 'stage', 'standard', 'started', 'state', 'status', 'status post', 'stenosis', 'stent', 'sterile', 'sterile dressing', 'sterile dressing applied', 'sterile fashion', 'steristrips', 'stomach', 'stone', 'stress', 'stress test', 'stroke', 'study', 'subcutaneous', 'subcutaneous tissue', 'subcuticular', 'subdural', 'subdural hematoma', 'subglottic', 'subjective', 'subjective patient', 'subjective patient yearold', 'subjective yearold', 'suite', 'summary', 'superior', 'supine', 'supine operating', 'supine position', 'surgery', 'surgery patient', 'surgical', 'suture', 'swelling', 'symptom', 'symptomatic', 'syndrome', 'table', 'taken', 'taken operating', 'taken operating room', 'taken recovery', 'taken recovery room', 'tear', 'technique', 'temporal', 'tendon', 'test', 'testis', 'therapy', 'thoracic', 'thyroid', 'tibial', 'time', 'tissue', 'title', 'title operation', 'today', 'tolerated', 'tolerated procedure', 'tolerated procedure wa', 'topical', 'total', 'total knee', 'tourniquet', 'tourniquet time', 'transferred', 'transferred recovery', 'treated', 'treatment', 'tube', 'tumor', 'tunnel', 'tunnel syndrome', 'type', 'ultrasound', 'underwent', 'undescended', 'unit', 'unremarkable', 'upper', 'upper extremity', 'upper lobe', 'urinary', 'urine', 'use', 'used', 'using', 'usual', 'usual fashion', 'valve', 'vein', 'venous', 'ventricular', 'versed', 'vicryl', 'vicryl suture', 'view', 'vision', 'visit', 'visual', 'vital', 'vital signs', 'vomiting', 'wa', 'wa admitted', 'wa applied', 'wa awakened', 'wa brought', 'wa brought operating', 'wa closed', 'wa extubated', 'wa given', 'wa injected', 'wa noted', 'wa obtained', 'wa obtained patient', 'wa performed', 'wa placed', 'wa prepped', 'wa prepped draped', 'wa referred', 'wa removed', 'wa seen', 'wa taken', 'wa taken operating', 'wa taken recovery', 'wa transferred', 'wa used', 'wall', 'weakness', 'week', 'weight', 'white', 'white female', 'white male', 'woman', 'work', 'worsening', 'wound', 'wound wa', 'wrist', 'xray', 'xyz', 'year', 'year ago', 'year old', 'yearold', 'yearold africanamerican', 'yearold boy', 'yearold caucasian', 'yearold female', 'yearold female present', 'yearold gentleman', 'yearold male', 'yearold man', 'yearold white', 'yearold white female', 'yearold white male', 'yearold woman', 'yo', 'yo rhf', 'yo rhm']
Bag-of-Words (CountVectorizer):
In [26]:
# CountVectorizer (Bag-of-Words)
countVectorizer = CountVectorizer(analyzer='word', stop_words='english', ngram_range=(1, 3), max_df=0.75, max_features=1000)
bow_matrix = countVectorizer.fit_transform(data['transcription'].tolist())
bow_feature_names = sorted(countVectorizer.get_feature_names_out())
print(bow_feature_names)
['abc', 'abcd', 'abdomen', 'abdomen pelvis', 'abdominal', 'abdominal pain', 'abnormal', 'abscess', 'activity', 'acute', 'additional', 'adenocarcinoma', 'adequate', 'administered', 'admission', 'admitted', 'admitting', 'africanamerican', 'age', 'ago', 'airway', 'alcohol', 'anemia', 'anesthesia', 'anesthesia care', 'anesthesia general', 'anesthesia general endotracheal', 'anesthesia local', 'anesthetic', 'angina', 'angiography', 'ankle', 'anterior', 'antibiotic', 'aortic', 'apnea', 'apparent', 'appendicitis', 'applied', 'appointment', 'appropriate', 'approximately', 'area', 'arm', 'artery', 'artery disease', 'arthritis', 'arthroplasty', 'asked', 'aspect', 'aspiration', 'assessment', 'associated', 'atrial', 'atrial fibrillation', 'awakened', 'axial', 'axial ct', 'axial ct image', 'axis', 'baby', 'base', 'benefit', 'benign', 'better', 'bid', 'bilateral', 'bilaterally', 'biopsy', 'bladder', 'bleeding', 'block', 'blood', 'blood loss', 'blood loss cc', 'blood loss minimal', 'blood loss ml', 'blood loss wa', 'blood pressure', 'body', 'bone', 'bowel', 'boy', 'brain', 'breast', 'breath', 'breathing', 'brief', 'brief history', 'brief history patient', 'bronchoscopy', 'brought', 'brought operating', 'brought operating room', 'bunion', 'bypass', 'cancer', 'carcinoma', 'cardiac', 'care', 'care unit', 'carotid', 'carpal', 'carpal tunnel', 'carpal tunnel syndrome', 'case', 'cataract', 'cataract right', 'cataract right eye', 'catheter', 'catheterization', 'caucasian', 'caucasian female', 'caucasian male', 'cc', 'cc cc', 'cell', 'cell carcinoma', 'central', 'cervical', 'cervical spine', 'chamber', 'change', 'check', 'chest', 'chest pain', 'chief', 'chief complaint', 'child', 'cholecystectomy', 'cholecystitis', 'cholelithiasis', 'chronic', 'circumcision', 'clear', 'clinic', 'clinical', 'closed', 'closed vicryl', 'closure', 'cm', 'colon', 'colonoscopy', 'come', 'common', 'complaining', 'complaint', 'complete', 'completed', 'complex', 'complication', 'complications', 'complications estimated', 'complications estimated blood', 'compression', 'concern', 'condition', 'consent', 'consent wa', 'consent wa obtained', 'consistent', 'consult', 'consultation', 'continue', 'continued', 'contrast', 'contrast reason', 'contrast reason exam', 'control', 'cord', 'coronal', 'coronary', 'coronary artery', 'coronary artery disease', 'correct', 'cough', 'count', 'count correct', 'course', 'crystalloid', 'ct', 'ct abdomen', 'ct image', 'ct scan', 'current', 'cyst', 'cystoscopy', 'daily', 'data', 'date', 'day', 'days', 'deep', 'defect', 'deformity', 'degenerative', 'delivery', 'denies', 'dental', 'department', 'depression', 'descending', 'description', 'description procedure', 'description procedure patient', 'developed', 'diabetes', 'diabetes mellitus', 'diagnosed', 'diagnoses', 'diagnoses patient', 'diagnoses patient wa', 'diagnoses sponge', 'diagnoses wa', 'diagnosis', 'diagnosis acute', 'diagnosis bilateral', 'diagnosis cataract', 'diagnosis cervical', 'diagnosis chronic', 'diagnosis es', 'diagnosis left', 'diagnosis recurrent', 'diagnosis right', 'diagnostic', 'diameter', 'diarrhea', 'did', 'difficulty', 'direct', 'disc', 'discectomy', 'discharge', 'discharge diagnoses', 'discharge diagnosis', 'discharged', 'discomfort', 'discussed', 'discussed patient', 'disease', 'disk', 'disorder', 'disposition', 'disposition patient', 'distal', 'distress', 'doe', 'doing', 'dorsal', 'dr', 'drain', 'drainage', 'drains', 'draped', 'draped sterile', 'draped usual', 'dressing', 'dressing applied', 'dressing wa', 'dressing wa applied', 'drop', 'dysphagia', 'ear', 'echocardiogram', 'edema', 'effusion', 'ekg', 'elbow', 'elevated', 'emergency', 'emergency department', 'emergency room', 'end', 'end procedure', 'endoscopy', 'endotracheal', 'endotracheal anesthesia', 'endotracheal tube', 'endstage', 'endstage renal', 'endstage renal disease', 'epidural', 'episode', 'es', 'esophageal', 'estimated', 'estimated blood', 'estimated blood loss', 'evaluate', 'evaluation', 'evidence', 'exam', 'exam ct', 'exam mri', 'examination', 'excision', 'exercise', 'explained', 'explained patient', 'external', 'extraction', 'extremity', 'extubated', 'eye', 'eye postoperative', 'eye postoperative diagnosis', 'eye procedure', 'face', 'failure', 'fall', 'family', 'family history', 'fascia', 'fashion', 'felt', 'female', 'female history', 'female present', 'female wa', 'femoral', 'fetal', 'fever', 'fibrillation', 'final', 'finding', 'findings', 'findings patient', 'finger', 'fixation', 'flow', 'fluid', 'fluids', 'foley', 'follow', 'followed', 'following', 'followup', 'foot', 'foramen', 'foreign', 'foreign body', 'fracture', 'free', 'french', 'frontal', 'function', 'fusion', 'general', 'general anesthesia', 'general endotracheal', 'general endotracheal anesthesia', 'gentleman', 'gi', 'given', 'going', 'good', 'good condition', 'grade', 'graft', 'greater', 'gross', 'ha', 'ha history', 'hand', 'hardware', 'having', 'head', 'headache', 'health', 'hearing', 'heart', 'heart rate', 'hematoma', 'hemorrhage', 'hemostasis', 'hernia', 'herniated', 'herniated nucleus', 'herniated nucleus pulposus', 'high', 'hip', 'history', 'history patient', 'history patient yearold', 'history present', 'history present illness', 'history yearold', 'home', 'hospital', 'hour', 'hx', 'hx yo', 'hx yo rhf', 'hx yo rhm', 'hypertension', 'hypertrophy', 'identified', 'ii', 'illness', 'illness patient', 'illness patient pleasant', 'illness patient yearold', 'illness yearold', 'illness yearold female', 'illness yearold male', 'image', 'imaging', 'implantation', 'impression', 'incision', 'incision wa', 'including', 'increased', 'indication', 'indication surgery', 'indications', 'indications patient', 'indications patient yearold', 'indications procedure', 'indications procedure patient', 'indications surgery', 'infection', 'inferior', 'informed', 'informed consent', 'informed consent wa', 'inguinal', 'inguinal hernia', 'initially', 'injected', 'injection', 'injury', 'insertion', 'instructed', 'instruction', 'instrument', 'intact', 'internal', 'interpretation', 'interrupted', 'intervention', 'intraocular', 'intraocular lens', 'intraoperative', 'intravenous', 'irrigated', 'iv', 'joint', 'kidney', 'knee', 'known', 'laceration', 'lady', 'lap', 'laparoscopic', 'large', 'lateral', 'layer', 'left', 'left breast', 'left foot', 'left hip', 'left knee', 'left lower', 'left lower extremity', 'left shoulder', 'left upper', 'leg', 'length', 'lens', 'lesion', 'level', 'lidocaine', 'ligament', 'likely', 'liver', 'll', 'lobe', 'local', 'loss', 'loss cc', 'loss minimal', 'loss ml', 'loss wa', 'low', 'low pain', 'lower', 'lower extremity', 'lower quadrant', 'ls', 'lumbar', 'lung', 'lymph', 'lymph node', 'mac', 'male', 'male present', 'male wa', 'man', 'management', 'manner', 'marcaine', 'mass', 'medial', 'medical', 'medical history', 'medication', 'medications', 'medium', 'mellitus', 'membrane', 'mental', 'metastatic', 'mg', 'middle', 'mild', 'minimal', 'minute', 'minutes', 'ml', 'mm', 'moderate', 'mom', 'monitored', 'monocryl', 'month', 'monthold', 'morning', 'mother', 'motor', 'mouth', 'mr', 'mri', 'ms', 'multiple', 'muscle', 'myocardial', 'nasal', 'nausea', 'nausea vomiting', 'neck', 'need', 'needed', 'needle', 'needle count', 'needle count correct', 'negative', 'nerve', 'neurologic', 'new', 'night', 'node', 'noncontrast', 'normal', 'nose', 'note', 'noted', 'nuclear', 'nucleus', 'nucleus pulposus', 'numbness', 'obstruction', 'obstructive', 'obtained', 'obtained patient', 'obtained patient wa', 'obtaining', 'office', 'old', 'onset', 'open', 'operating', 'operating room', 'operating room placed', 'operating table', 'operation', 'operation patient', 'operation performed', 'operations', 'operative', 'operative procedure', 'oral', 'osteoarthritis', 'otitis', 'outpatient', 'ox', 'pacemaker', 'pacu', 'pain', 'pain history', 'pain history present', 'parent', 'partial', 'past', 'past medical', 'past medical history', 'patent', 'pathology', 'patient', 'patient ha', 'patient monthold', 'patient pleasant', 'patient pleasant yearold', 'patient present', 'patient tolerated', 'patient tolerated procedure', 'patient wa', 'patient wa brought', 'patient wa extubated', 'patient wa placed', 'patient wa taken', 'patient yearold', 'patient yearold africanamerican', 'patient yearold caucasian', 'patient yearold female', 'patient yearold gentleman', 'patient yearold male', 'patient yearold white', 'patient yearold woman', 'pelvic', 'pelvis', 'percutaneous', 'performed', 'performed patient', 'perfusion', 'period', 'persistent', 'phacoemulsification', 'physical', 'physical examination', 'physician', 'place', 'placed', 'placed supine', 'placed supine position', 'placement', 'plan', 'plate', 'pleasant', 'pleasant yearold', 'pleural', 'pleural effusion', 'pneumonia', 'point', 'polyp', 'position', 'positive', 'possible', 'post', 'posterior', 'postoperative', 'postoperative diagnoses', 'postoperative diagnosis', 'postoperative diagnosis left', 'postoperative diagnosis right', 'postprocedure', 'pregnancy', 'preoperative', 'preoperative diagnoses', 'preoperative diagnoses patient', 'preoperative diagnoses sponge', 'preoperative diagnosis', 'preoperative diagnosis acute', 'preoperative diagnosis bilateral', 'preoperative diagnosis left', 'preoperative diagnosis right', 'prepped', 'prepped draped', 'prepped draped usual', 'preprocedure', 'present', 'present illness', 'present illness patient', 'present illness yearold', 'present today', 'presentation', 'presented', 'presented emergency', 'presented emergency room', 'pressure', 'previous', 'previously', 'primary', 'prior', 'prn', 'problem', 'procedure', 'procedure informed', 'procedure informed consent', 'procedure left', 'procedure patient', 'procedure patient wa', 'procedure patient yearold', 'procedure performed', 'procedure right', 'procedure wa', 'procedure yearold', 'procedures', 'procedures performed', 'progressive', 'prostate', 'prostate cancer', 'protocol', 'proximal', 'pulmonary', 'pulposus', 'quadrant', 'question', 'radial', 'radiation', 'radiculopathy', 'rate', 'reason', 'reason consult', 'reason consultation', 'reason exam', 'reason visit', 'received', 'recent', 'recently', 'recommendation', 'recommended', 'reconstruction', 'recovery', 'recovery room', 'recovery room satisfactory', 'recovery room stable', 'recovery stable', 'rectal', 'recurrent', 'reduction', 'referral', 'referred', 'reflux', 'regarding', 'region', 'related', 'release', 'removal', 'removed', 'renal', 'renal disease', 'renal mass', 'repair', 'replacement', 'report', 'resection', 'residual', 'respiratory', 'result', 'return', 'returned', 'revealed', 'review', 'reviewed', 'rhf', 'rhm', 'right', 'right breast', 'right eye', 'right eye postoperative', 'right foot', 'right inguinal', 'right inguinal hernia', 'right knee', 'right lower', 'right shoulder', 'right upper', 'righthanded', 'risk', 'risk benefit', 'room', 'room placed', 'room placed supine', 'room satisfactory', 'room satisfactory condition', 'room stable', 'room stable condition', 'routine', 'rule', 'running', 'ruptured', 'satisfactory', 'satisfactory condition', 'scan', 'scope', 'screening', 'screw', 'second', 'secondary', 'sedation', 'seen', 'seizure', 'sent', 'service', 'severe', 'shortness', 'shortness breath', 'shoulder', 'showed', 'shunt', 'sign', 'significant', 'signs', 'single', 'sinus', 'site', 'size', 'skin', 'skin wa', 'sleep', 'small', 'soft', 'soft tissue', 'solution', 'space', 'specimen', 'specimens', 'speech', 'spinal', 'spine', 'spondylosis', 'sponge', 'sponge lap', 'sponge needle', 'sponge needle count', 'squamous', 'squamous cell', 'squamous cell carcinoma', 'stable', 'stable condition', 'stage', 'standard', 'started', 'state', 'status', 'status post', 'stenosis', 'stent', 'sterile', 'sterile dressing', 'sterile dressing applied', 'sterile fashion', 'steristrips', 'stomach', 'stone', 'stress', 'stress test', 'stroke', 'study', 'subcutaneous', 'subcutaneous tissue', 'subcuticular', 'subdural', 'subdural hematoma', 'subglottic', 'subjective', 'subjective patient', 'subjective patient yearold', 'subjective yearold', 'suite', 'summary', 'superior', 'supine', 'supine operating', 'supine position', 'surgery', 'surgery patient', 'surgical', 'suture', 'swelling', 'symptom', 'symptomatic', 'syndrome', 'table', 'taken', 'taken operating', 'taken operating room', 'taken recovery', 'taken recovery room', 'tear', 'technique', 'temporal', 'tendon', 'test', 'testis', 'therapy', 'thoracic', 'thyroid', 'tibial', 'time', 'tissue', 'title', 'title operation', 'today', 'tolerated', 'tolerated procedure', 'tolerated procedure wa', 'topical', 'total', 'total knee', 'tourniquet', 'tourniquet time', 'transferred', 'transferred recovery', 'treated', 'treatment', 'tube', 'tumor', 'tunnel', 'tunnel syndrome', 'type', 'ultrasound', 'underwent', 'undescended', 'unit', 'unremarkable', 'upper', 'upper extremity', 'upper lobe', 'urinary', 'urine', 'use', 'used', 'using', 'usual', 'usual fashion', 'valve', 'vein', 'venous', 'ventricular', 'versed', 'vicryl', 'vicryl suture', 'view', 'vision', 'visit', 'visual', 'vital', 'vital signs', 'vomiting', 'wa', 'wa admitted', 'wa applied', 'wa awakened', 'wa brought', 'wa brought operating', 'wa closed', 'wa extubated', 'wa given', 'wa injected', 'wa noted', 'wa obtained', 'wa obtained patient', 'wa performed', 'wa placed', 'wa prepped', 'wa prepped draped', 'wa referred', 'wa removed', 'wa seen', 'wa taken', 'wa taken operating', 'wa taken recovery', 'wa transferred', 'wa used', 'wall', 'weakness', 'week', 'weight', 'white', 'white female', 'white male', 'woman', 'work', 'worsening', 'wound', 'wound wa', 'wrist', 'xray', 'xyz', 'year', 'year ago', 'year old', 'yearold', 'yearold africanamerican', 'yearold boy', 'yearold caucasian', 'yearold female', 'yearold female present', 'yearold gentleman', 'yearold male', 'yearold man', 'yearold white', 'yearold white female', 'yearold white male', 'yearold woman', 'yo', 'yo rhf', 'yo rhm']
In [27]:
import gc
gc.collect()
tfIdfMatrix = tfIdfMat.todense()
tfIdfArray = np.asarray(tfIdfMatrix)
tfIdf_labels = data['medical_specialty'].tolist()
tsne_results_tfIdf = TSNE(n_components=2,init='random',random_state=0, perplexity=40).fit_transform(tfIdfArray)
plt.figure(figsize=(20,10))
palette = sns.hls_palette(21, l=.3, s=.9)
sns.scatterplot(
x=tsne_results_tfIdf[:,0], y=tsne_results_tfIdf[:,1],
hue=tfIdf_labels,
palette= palette,
legend="full",
alpha=0.3
)
plt.show()
In [28]:
gc.collect()
bowMatrix = bow_matrix.todense()
bowArray = np.asarray(bowMatrix)
bow_labels = data['medical_specialty'].tolist()
tsne_results_bow = TSNE(n_components=2,init='random',random_state=0, perplexity=40).fit_transform(bowArray)
plt.figure(figsize=(20,10))
palette = sns.hls_palette(21, l=.3, s=.9)
sns.scatterplot(
x=tsne_results_bow[:,0], y=tsne_results_bow[:,1],
hue=bow_labels,
palette= palette,
legend="full",
alpha=0.3
)
plt.show()
PCA (Principal Component Analysis)
Reducing the size of the TF-IDF and Bow matrix
TF-IDF ve BOW matrisinin boyutunu azaltma
In [29]:
gc.collect()
pca = PCA(n_components=0.95)
For TF-IDF:
In [30]:
tfIdfMat_reduced = pca.fit_transform(tfIdfMat.toarray())
labels_tfIdfMat = data['medical_specialty'].tolist()
category_list_tfIdf = data.medical_specialty.unique()
For BOW:
In [31]:
bow_reduced = pca.fit_transform(bow_matrix.toarray())
labels_bow = data['medical_specialty'].tolist()
category_list_bow = data.medical_specialty.unique()
Implement Algorithms & Architecture
In [32]:
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.naive_bayes import MultinomialNB
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Conv1D, LSTM, GRU, Dense, GlobalMaxPooling1D, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from sklearn.metrics import accuracy_score
With TF-IDF:
In [33]:
X_train_tfIdf, X_test_tfIdf, y_train_tfIdf, y_test_tfIdf = train_test_split(tfIdfMat_reduced, labels_tfIdfMat, stratify=labels_tfIdfMat,random_state=1)
print('Train_Set_Size:'+str(X_train_tfIdf.shape))
print('Test_Set_Size:'+str(X_test_tfIdf.shape))
Train_Set_Size:(3447, 587) Test_Set_Size:(1150, 587)
Ensemble Learning
In [34]:
def create_cnn_tfIdf(input_shape, vocab_size, embedding_dim):
input_layer = Input(shape=input_shape)
x = Embedding(input_dim=vocab_size, output_dim=embedding_dim)(input_layer)
x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
x = GlobalMaxPooling1D()(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_lstm_tfIdf(input_shape, vocab_size, embedding_dim):
input_layer = Input(shape=input_shape)
x = Embedding(input_dim=vocab_size, output_dim=embedding_dim)(input_layer)
x = LSTM(50, return_sequences=True)(x)
x = LSTM(50)(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_gru_tfIdf(input_shape, vocab_size, embedding_dim):
input_layer = Input(shape=input_shape)
x = Embedding(input_dim=vocab_size, output_dim=embedding_dim)(input_layer)
x = GRU(50, return_sequences=True)(x)
x = GRU(50)(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_ensemble_tfIdf(input_shape, vocab_size, embedding_dim, num_classes):
cnn = create_cnn_tfIdf(input_shape, vocab_size, embedding_dim)
lstm = create_lstm_tfIdf(input_shape, vocab_size, embedding_dim)
gru = create_gru_tfIdf(input_shape, vocab_size, embedding_dim)
concatenated = concatenate([cnn.output, lstm.output, gru.output])
x = Dense(100, activation='relu')(concatenated)
output = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[cnn.input, lstm.input, gru.input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
In [35]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
input_shape = X_train_tfIdf.shape[1]
num_classes = len(category_list_tfIdf)
vocab_size = len(category_list_tfIdf)
embedding_dim = 50
# Creating model
model_ensemble_tfIdf = create_ensemble_tfIdf(input_shape, vocab_size, embedding_dim, num_classes)
model_ensemble_tfIdf.summary()
# LabelEncoder:
label_encoder = LabelEncoder()
y_train_encoded_ensemble_tfIdf = label_encoder.fit_transform(y_train_tfIdf)
y_test_encoded_ensemble_tfIdf = label_encoder.transform(y_test_tfIdf)
# Train model
history_ensemble_tfIdf = model_ensemble_tfIdf.fit([X_train_tfIdf, X_train_tfIdf, X_train_tfIdf], y_train_encoded_ensemble_tfIdf, epochs=1, batch_size=32)
# Predict
y_pred_ensemble_tfIdf = model_ensemble_tfIdf.predict([X_test_tfIdf, X_test_tfIdf, X_test_tfIdf])
y_pred_ensemble_tfIdf = np.argmax(y_pred_ensemble_tfIdf, axis=1)
print(classification_report(y_test_encoded_ensemble_tfIdf, y_pred_ensemble_tfIdf, target_names=category_list_tfIdf, zero_division=1))
accuracy_ensemble_tfIdf = accuracy_score(y_test_encoded_ensemble_tfIdf, y_pred_ensemble_tfIdf)
print("Accuracy:", accuracy_ensemble_tfIdf)
Model: "model_3"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 587)] 0 []
embedding (Embedding) (None, 587, 50) 1050 ['input_1[0][0]']
input_2 (InputLayer) [(None, 587)] 0 []
input_3 (InputLayer) [(None, 587)] 0 []
conv1d (Conv1D) (None, 585, 64) 9664 ['embedding[0][0]']
embedding_1 (Embedding) (None, 587, 50) 1050 ['input_2[0][0]']
embedding_2 (Embedding) (None, 587, 50) 1050 ['input_3[0][0]']
conv1d_1 (Conv1D) (None, 583, 64) 12352 ['conv1d[0][0]']
lstm (LSTM) (None, 587, 50) 20200 ['embedding_1[0][0]']
gru (GRU) (None, 587, 50) 15300 ['embedding_2[0][0]']
global_max_pooling1d (Glob (None, 64) 0 ['conv1d_1[0][0]']
alMaxPooling1D)
lstm_1 (LSTM) (None, 50) 20200 ['lstm[0][0]']
gru_1 (GRU) (None, 50) 15300 ['gru[0][0]']
concatenate (Concatenate) (None, 164) 0 ['global_max_pooling1d[0][0]',
'lstm_1[0][0]',
'gru_1[0][0]']
dense (Dense) (None, 100) 16500 ['concatenate[0][0]']
dense_1 (Dense) (None, 21) 2121 ['dense[0][0]']
==================================================================================================
Total params: 114787 (448.39 KB)
Trainable params: 114787 (448.39 KB)
Non-trainable params: 0 (0.00 Byte)
__________________________________________________________________________________________________
108/108 [==============================] - 36s 284ms/step - loss: 2.7277 - accuracy: 0.2315
36/36 [==============================] - 4s 90ms/step
precision recall f1-score support
Cardiovascular / Pulmonary 1.00 0.00 0.00 93
Neurology 1.00 0.00 0.00 129
Urology 1.00 0.00 0.00 27
General Medicine 1.00 0.00 0.00 24
Surgery 1.00 0.00 0.00 19
SOAP / Chart / Progress Notes 1.00 0.00 0.00 56
Radiology 1.00 0.00 0.00 65
Psychiatry / Psychology 1.00 0.00 0.00 22
Pediatrics - Neonatal 1.00 0.00 0.00 20
Pain Management 1.00 0.00 0.00 56
Orthopedic 1.00 0.00 0.00 24
Ophthalmology 1.00 0.00 0.00 39
Obstetrics / Gynecology 1.00 0.00 0.00 21
Neurosurgery 1.00 0.00 0.00 89
Nephrology 1.00 0.00 0.00 15
Hematology - Oncology 1.00 0.00 0.00 17
Gastroenterology 1.00 0.00 0.00 13
ENT - Otolaryngology 1.00 0.00 0.00 68
Emergency Room Reports 1.00 0.00 0.00 42
Discharge Summary 0.24 1.00 0.38 272
Consult - History and Phy. 1.00 0.00 0.00 39
accuracy 0.24 1150
macro avg 0.96 0.05 0.02 1150
weighted avg 0.82 0.24 0.09 1150
Accuracy: 0.23652173913043478
1. LogisticRegression
In [36]:
tfIdf_lr_model = LogisticRegression(penalty= 'elasticnet', solver= 'saga', l1_ratio=0.5, random_state=1).fit(X_train_tfIdf, y_train_tfIdf)
y_pred_lr_tfIdf= tfIdf_lr_model.predict(X_test_tfIdf)
print(classification_report(y_test_tfIdf, y_pred_lr_tfIdf, labels=category_list_tfIdf, zero_division=1))
accuracy_lr_tfIdf = accuracy_score(y_test_tfIdf, y_pred_lr_tfIdf)
print("Accuracy:", accuracy_lr_tfIdf)
precision recall f1-score support
Cardiovascular / Pulmonary 0.34 0.30 0.32 93
Neurology 0.40 0.21 0.28 56
Urology 0.33 0.10 0.16 39
General Medicine 0.22 0.09 0.13 65
Surgery 0.45 0.78 0.57 272
SOAP / Chart / Progress Notes 0.35 0.31 0.33 42
Radiology 0.35 0.35 0.35 68
Psychiatry / Psychology 1.00 0.00 0.00 13
Pediatrics - Neonatal 1.00 0.00 0.00 17
Pain Management 1.00 0.20 0.33 15
Orthopedic 0.44 0.26 0.33 89
Ophthalmology 0.50 0.19 0.28 21
Obstetrics / Gynecology 0.10 0.03 0.04 39
Neurosurgery 1.00 0.00 0.00 24
Nephrology 1.00 0.05 0.10 20
Hematology - Oncology 0.00 0.00 1.00 22
Gastroenterology 0.33 0.09 0.14 56
ENT - Otolaryngology 0.00 0.00 1.00 24
Emergency Room Reports 1.00 0.00 0.00 19
Discharge Summary 0.50 0.56 0.53 27
Consult - History and Phy. 0.30 0.67 0.41 129
accuracy 0.38 1150
macro avg 0.51 0.20 0.30 1150
weighted avg 0.41 0.38 0.36 1150
Accuracy: 0.38173913043478264
In [37]:
labels_cm_tfIdf_lg = category_list_tfIdf
cm = confusion_matrix(y_test_tfIdf, y_pred_lr_tfIdf)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g'); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_lg); ax.yaxis.set_ticklabels(labels_cm_tfIdf_lg);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
2. Random Forest
In [38]:
tfIdf_rf_model = RandomForestClassifier(random_state=1)
tfIdf_rf_model.fit(X_train_tfIdf, y_train_tfIdf)
y_pred_rf_tfIdf = tfIdf_rf_model.predict(X_test_tfIdf)
print(classification_report(y_test_tfIdf, y_pred_rf_tfIdf))
accuracy_rf_tfIdf = accuracy_score(y_test_tfIdf, y_pred_rf_tfIdf)
print("Accuracy:", accuracy_rf_tfIdf)
precision recall f1-score support
Cardiovascular / Pulmonary 0.07 0.05 0.06 93
Consult - History and Phy. 0.14 0.20 0.16 129
Discharge Summary 0.21 0.15 0.17 27
ENT - Otolaryngology 0.00 0.00 0.00 24
Emergency Room Reports 0.00 0.00 0.00 19
Gastroenterology 0.00 0.00 0.00 56
General Medicine 0.03 0.03 0.03 65
Hematology - Oncology 0.00 0.00 0.00 22
Nephrology 0.00 0.00 0.00 20
Neurology 0.00 0.00 0.00 56
Neurosurgery 0.00 0.00 0.00 24
Obstetrics / Gynecology 0.03 0.03 0.03 39
Ophthalmology 0.00 0.00 0.00 21
Orthopedic 0.00 0.00 0.00 89
Pain Management 0.25 0.13 0.17 15
Pediatrics - Neonatal 0.00 0.00 0.00 17
Psychiatry / Psychology 0.00 0.00 0.00 13
Radiology 0.11 0.12 0.12 68
SOAP / Chart / Progress Notes 0.05 0.05 0.05 42
Surgery 0.22 0.28 0.24 272
Urology 0.00 0.00 0.00 39
accuracy 0.11 1150
macro avg 0.05 0.05 0.05 1150
weighted avg 0.09 0.11 0.10 1150
Accuracy: 0.10956521739130434
In [39]:
labels_cm_tfIdf_rf = category_list_tfIdf
cm = confusion_matrix(y_test_tfIdf, y_pred_rf_tfIdf)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_rf); ax.yaxis.set_ticklabels(labels_cm_tfIdf_rf);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
3. XGBoost:
In [40]:
from sklearn.preprocessing import LabelEncoder
tfIdf_xgb_model = xgb.XGBClassifier(random_state=1)
label_encoder = LabelEncoder()
# Sınıf etiketlerini sayısal değerlere dönüştürme
y_train_encoded = label_encoder.fit_transform(y_train_tfIdf)
tfIdf_xgb_model.fit(X_train_tfIdf, y_train_encoded)
y_test_encoded = label_encoder.transform(y_test_tfIdf)
y_pred_xgb_tfIdf = tfIdf_xgb_model.predict(X_test_tfIdf)
print(classification_report(y_test_encoded, y_pred_xgb_tfIdf))
accuracy_xgb_tfIdf = accuracy_score(y_test_encoded, y_pred_xgb_tfIdf)
print("Accuracy:", accuracy_xgb_tfIdf)
precision recall f1-score support
0 0.12 0.10 0.11 93
1 0.14 0.18 0.16 129
2 0.26 0.22 0.24 27
3 0.00 0.00 0.00 24
4 0.00 0.00 0.00 19
5 0.00 0.00 0.00 56
6 0.06 0.06 0.06 65
7 0.00 0.00 0.00 22
8 0.00 0.00 0.00 20
9 0.00 0.00 0.00 56
10 0.00 0.00 0.00 24
11 0.05 0.05 0.05 39
12 0.06 0.05 0.05 21
13 0.01 0.01 0.01 89
14 0.57 0.27 0.36 15
15 0.00 0.00 0.00 17
16 0.00 0.00 0.00 13
17 0.15 0.19 0.17 68
18 0.08 0.10 0.08 42
19 0.22 0.26 0.23 272
20 0.03 0.03 0.03 39
accuracy 0.12 1150
macro avg 0.08 0.07 0.07 1150
weighted avg 0.11 0.12 0.11 1150
Accuracy: 0.12
In [41]:
labels_cm_tfIdf_xgb = category_list_tfIdf
cm = confusion_matrix(y_test_encoded, y_pred_xgb_tfIdf)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_xgb); ax.yaxis.set_ticklabels(labels_cm_tfIdf_xgb);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
4. LightGBM:
In [42]:
tfIdf_lgb_model = lgb.LGBMClassifier(random_state=1)
tfIdf_lgb_model.fit(X_train_tfIdf, y_train_tfIdf)
y_pred_lgb_tfIdf = tfIdf_lgb_model.predict(X_test_tfIdf)
print(classification_report(y_test_tfIdf, y_pred_lgb_tfIdf))
accuracy_lgb_tfIdf = accuracy_score(y_test_tfIdf, y_pred_lgb_tfIdf)
print("Accuracy:", accuracy_lgb_tfIdf)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013474 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 149685
[LightGBM] [Info] Number of data points in the train set: 3447, number of used features: 587
[LightGBM] [Info] Start training from score -2.517638
[LightGBM] [Info] Start training from score -2.186835
[LightGBM] [Info] Start training from score -3.750810
[LightGBM] [Info] Start training from score -3.868593
[LightGBM] [Info] Start training from score -4.119908
[LightGBM] [Info] Start training from score -3.021296
[LightGBM] [Info] Start training from score -2.877401
[LightGBM] [Info] Start training from score -3.925752
[LightGBM] [Info] Start training from score -4.034386
[LightGBM] [Info] Start training from score -3.027266
[LightGBM] [Info] Start training from score -3.896764
[LightGBM] [Info] Start training from score -3.391669
[LightGBM] [Info] Start training from score -4.018125
[LightGBM] [Info] Start training from score -2.561763
[LightGBM] [Info] Start training from score -4.316618
[LightGBM] [Info] Start training from score -4.174968
[LightGBM] [Info] Start training from score -4.456380
[LightGBM] [Info] Start training from score -2.822250
[LightGBM] [Info] Start training from score -3.324978
[LightGBM] [Info] Start training from score -1.440845
[LightGBM] [Info] Start training from score -3.383086
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
precision recall f1-score support
Cardiovascular / Pulmonary 0.13 0.11 0.12 93
Consult - History and Phy. 0.14 0.20 0.17 129
Discharge Summary 0.24 0.19 0.21 27
ENT - Otolaryngology 0.00 0.00 0.00 24
Emergency Room Reports 0.00 0.00 0.00 19
Gastroenterology 0.04 0.04 0.04 56
General Medicine 0.04 0.05 0.05 65
Hematology - Oncology 0.00 0.00 0.00 22
Nephrology 0.00 0.00 0.00 20
Neurology 0.02 0.02 0.02 56
Neurosurgery 0.00 0.00 0.00 24
Obstetrics / Gynecology 0.07 0.08 0.08 39
Ophthalmology 0.06 0.05 0.05 21
Orthopedic 0.00 0.00 0.00 89
Pain Management 0.25 0.13 0.17 15
Pediatrics - Neonatal 0.00 0.00 0.00 17
Psychiatry / Psychology 0.00 0.00 0.00 13
Radiology 0.10 0.12 0.11 68
SOAP / Chart / Progress Notes 0.03 0.02 0.02 42
Surgery 0.22 0.26 0.24 272
Urology 0.00 0.00 0.00 39
accuracy 0.11 1150
macro avg 0.06 0.06 0.06 1150
weighted avg 0.10 0.11 0.11 1150
Accuracy: 0.11478260869565217
In [43]:
labels_cm_tfIdf_lgbm = category_list_tfIdf
cm = confusion_matrix(y_test_tfIdf, y_pred_lgb_tfIdf)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_lgbm); ax.yaxis.set_ticklabels(labels_cm_tfIdf_lgbm);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
5. Multinomial NB:
In [44]:
X_train_nb_tfIdf, X_test_nb_tfIdf, y_train_nb_tfIdf, y_test_nb_tfIdf = train_test_split(tfIdfMat.toarray(), labels_tfIdfMat, stratify=labels_tfIdfMat, random_state=1)
tfIdf_nb_model = MultinomialNB()
tfIdf_nb_model.fit(X_train_nb_tfIdf, y_train_nb_tfIdf)
y_pred_nb_tfIdf = tfIdf_nb_model.predict(X_test_nb_tfIdf)
print(classification_report(y_test_nb_tfIdf, y_pred_nb_tfIdf, zero_division=1))
accuracy_nb_tfIdf = accuracy_score(y_test_nb_tfIdf, y_pred_nb_tfIdf)
print("Accuracy:", accuracy_nb_tfIdf)
precision recall f1-score support
Cardiovascular / Pulmonary 0.42 0.26 0.32 93
Consult - History and Phy. 0.30 0.87 0.44 129
Discharge Summary 0.40 0.30 0.34 27
ENT - Otolaryngology 1.00 0.00 0.00 24
Emergency Room Reports 1.00 0.00 0.00 19
Gastroenterology 0.30 0.05 0.09 56
General Medicine 0.33 0.06 0.10 65
Hematology - Oncology 1.00 0.00 0.00 22
Nephrology 1.00 0.00 0.00 20
Neurology 0.37 0.18 0.24 56
Neurosurgery 1.00 0.00 0.00 24
Obstetrics / Gynecology 0.11 0.03 0.04 39
Ophthalmology 0.56 0.24 0.33 21
Orthopedic 0.31 0.12 0.18 89
Pain Management 1.00 0.07 0.12 15
Pediatrics - Neonatal 1.00 0.00 0.00 17
Psychiatry / Psychology 1.00 0.00 0.00 13
Radiology 0.33 0.38 0.36 68
SOAP / Chart / Progress Notes 0.38 0.12 0.18 42
Surgery 0.44 0.81 0.57 272
Urology 0.50 0.05 0.09 39
accuracy 0.38 1150
macro avg 0.61 0.17 0.16 1150
weighted avg 0.46 0.38 0.29 1150
Accuracy: 0.37565217391304345
In [45]:
labels_cm_tfIdf_nb = category_list_tfIdf
cm = confusion_matrix(y_test_nb_tfIdf, y_pred_nb_tfIdf)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_nb); ax.yaxis.set_ticklabels(labels_cm_tfIdf_nb);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
With BOW:
In [46]:
X_train_bow, X_test_bow, y_train_bow, y_test_bow = train_test_split(bow_reduced, labels_bow, stratify=labels_bow,random_state=1)
print('Train_Set_Size:'+str(X_train_bow.shape))
print('Test_Set_Size:'+str(X_test_bow.shape))
Train_Set_Size:(3447, 470) Test_Set_Size:(1150, 470)
Ensemble Learning
In [47]:
def create_cnn_bow(input_shape, num_classes):
input_layer = Input(shape=(input_shape,))
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_lstm_bow(input_shape, num_classes):
input_layer = Input(shape=(input_shape,))
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_gru_bow(input_shape, num_classes):
input_layer = Input(shape=(input_shape,))
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
model = Model(inputs=input_layer, outputs=x)
return model
# Ensemble modeli oluşturma
def create_ensemble_bow(input_shape, num_classes):
cnn = create_cnn_bow(input_shape, num_classes)
lstm = create_lstm_bow(input_shape, num_classes)
gru = create_gru_bow(input_shape, num_classes)
concatenated = concatenate([cnn.output, lstm.output, gru.output])
x = Dense(100, activation='relu')(concatenated)
output = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[cnn.input, lstm.input, gru.input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
In [48]:
input_shape = X_train_bow.shape[1]
num_classes = len(category_list_bow)
# Creating model
model_ensemble_bow = create_ensemble_bow(input_shape, num_classes)
model_ensemble_bow.summary()
# LabelEncoder:
label_encoder = LabelEncoder()
y_train_encoded_ensemble_bow = label_encoder.fit_transform(y_train_bow)
y_test_encoded_ensemble_bow = label_encoder.transform(y_test_bow)
# Train model
history_ensemble_bow = model_ensemble_bow.fit([X_train_bow, X_train_bow, X_train_bow], y_train_encoded_ensemble_bow, epochs=1, batch_size=32)
# Predict
y_pred_ensemble_bow = model_ensemble_bow.predict([X_test_bow, X_test_bow, X_test_bow])
y_pred_ensemble_bow = np.argmax(y_pred_ensemble_bow, axis=1)
print(classification_report(y_test_encoded_ensemble_bow, y_pred_ensemble_bow, target_names=category_list_bow, zero_division=1))
accuracy_ensemble_bow = accuracy_score(y_test_encoded_ensemble_bow, y_pred_ensemble_bow)
print("Accuracy:", accuracy_ensemble_bow)
Model: "model_7"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_4 (InputLayer) [(None, 470)] 0 []
dense_2 (Dense) (None, 64) 30144 ['input_4[0][0]']
input_5 (InputLayer) [(None, 470)] 0 []
input_6 (InputLayer) [(None, 470)] 0 []
dense_3 (Dense) (None, 64) 4160 ['dense_2[0][0]']
dense_5 (Dense) (None, 64) 30144 ['input_5[0][0]']
dense_7 (Dense) (None, 64) 30144 ['input_6[0][0]']
dense_4 (Dense) (None, 64) 4160 ['dense_3[0][0]']
dense_6 (Dense) (None, 64) 4160 ['dense_5[0][0]']
dense_8 (Dense) (None, 64) 4160 ['dense_7[0][0]']
concatenate_1 (Concatenate (None, 192) 0 ['dense_4[0][0]',
) 'dense_6[0][0]',
'dense_8[0][0]']
dense_9 (Dense) (None, 100) 19300 ['concatenate_1[0][0]']
dense_10 (Dense) (None, 21) 2121 ['dense_9[0][0]']
==================================================================================================
Total params: 128493 (501.93 KB)
Trainable params: 128493 (501.93 KB)
Non-trainable params: 0 (0.00 Byte)
__________________________________________________________________________________________________
108/108 [==============================] - 1s 2ms/step - loss: 2.5221 - accuracy: 0.3055
36/36 [==============================] - 0s 2ms/step
precision recall f1-score support
Cardiovascular / Pulmonary 0.44 0.12 0.19 93
Neurology 0.26 0.95 0.41 129
Urology 1.00 0.00 0.00 27
General Medicine 1.00 0.00 0.00 24
Surgery 1.00 0.00 0.00 19
SOAP / Chart / Progress Notes 1.00 0.00 0.00 56
Radiology 1.00 0.00 0.00 65
Psychiatry / Psychology 1.00 0.00 0.00 22
Pediatrics - Neonatal 1.00 0.00 0.00 20
Pain Management 0.43 0.05 0.10 56
Orthopedic 1.00 0.00 0.00 24
Ophthalmology 1.00 0.00 0.00 39
Obstetrics / Gynecology 0.53 0.38 0.44 21
Neurosurgery 0.00 0.00 1.00 89
Nephrology 1.00 0.00 0.00 15
Hematology - Oncology 1.00 0.00 0.00 17
Gastroenterology 1.00 0.00 0.00 13
ENT - Otolaryngology 0.39 0.56 0.46 68
Emergency Room Reports 1.00 0.00 0.00 42
Discharge Summary 0.45 0.90 0.60 272
Consult - History and Phy. 1.00 0.00 0.00 39
accuracy 0.37 1150
macro avg 0.79 0.14 0.15 1150
weighted avg 0.59 0.37 0.32 1150
Accuracy: 0.37130434782608696
1. LogisticRegression
In [49]:
bow_lr_model = LogisticRegression(penalty= 'l1', solver= 'liblinear', random_state=1).fit(X_train_bow, y_train_bow)
y_pred_lr_bow= bow_lr_model.predict(X_test_bow)
print(classification_report(y_test_bow, y_pred_lr_bow, labels=category_list_bow, zero_division=1))
accuracy_lr_bow = accuracy_score(y_test_bow, y_pred_lr_bow)
print("Accuracy:", accuracy_lr_bow)
precision recall f1-score support
Cardiovascular / Pulmonary 0.24 0.22 0.23 93
Neurology 0.30 0.29 0.29 56
Urology 0.28 0.23 0.25 39
General Medicine 0.15 0.11 0.12 65
Surgery 0.37 0.52 0.43 272
SOAP / Chart / Progress Notes 0.20 0.29 0.24 42
Radiology 0.21 0.24 0.22 68
Psychiatry / Psychology 0.00 0.00 1.00 13
Pediatrics - Neonatal 0.00 0.00 1.00 17
Pain Management 0.57 0.53 0.55 15
Orthopedic 0.13 0.07 0.09 89
Ophthalmology 0.11 0.05 0.07 21
Obstetrics / Gynecology 0.04 0.03 0.03 39
Neurosurgery 0.00 0.00 1.00 24
Nephrology 0.00 0.00 1.00 20
Hematology - Oncology 0.09 0.05 0.06 22
Gastroenterology 0.21 0.12 0.16 56
ENT - Otolaryngology 0.17 0.04 0.07 24
Emergency Room Reports 0.15 0.11 0.12 19
Discharge Summary 0.36 0.44 0.40 27
Consult - History and Phy. 0.27 0.41 0.32 129
accuracy 0.27 1150
macro avg 0.18 0.18 0.36 1150
weighted avg 0.24 0.27 0.31 1150
Accuracy: 0.27217391304347827
In [50]:
labels_cm_bow_lg = category_list_bow
cm = confusion_matrix(y_test_bow, y_pred_lr_bow)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g'); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_lg); ax.yaxis.set_ticklabels(labels_cm_bow_lg);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
2. Random Forest
In [51]:
bow_rf_model = RandomForestClassifier(random_state=1)
bow_rf_model.fit(X_train_bow, y_train_bow)
y_pred_rf_bow = bow_rf_model.predict(X_test_bow)
print(classification_report(y_test_bow, y_pred_rf_bow))
accuracy_rf_bow = accuracy_score(y_test_bow, y_pred_rf_bow)
print("Accuracy:", accuracy_rf_bow)
precision recall f1-score support
Cardiovascular / Pulmonary 0.04 0.03 0.04 93
Consult - History and Phy. 0.14 0.19 0.16 129
Discharge Summary 0.22 0.15 0.18 27
ENT - Otolaryngology 0.00 0.00 0.00 24
Emergency Room Reports 0.00 0.00 0.00 19
Gastroenterology 0.00 0.00 0.00 56
General Medicine 0.07 0.08 0.08 65
Hematology - Oncology 0.00 0.00 0.00 22
Nephrology 0.00 0.00 0.00 20
Neurology 0.02 0.02 0.02 56
Neurosurgery 0.00 0.00 0.00 24
Obstetrics / Gynecology 0.03 0.03 0.03 39
Ophthalmology 0.00 0.00 0.00 21
Orthopedic 0.00 0.00 0.00 89
Pain Management 0.25 0.13 0.17 15
Pediatrics - Neonatal 0.00 0.00 0.00 17
Psychiatry / Psychology 0.00 0.00 0.00 13
Radiology 0.09 0.09 0.09 68
SOAP / Chart / Progress Notes 0.07 0.07 0.07 42
Surgery 0.21 0.28 0.24 272
Urology 0.00 0.00 0.00 39
accuracy 0.11 1150
macro avg 0.05 0.05 0.05 1150
weighted avg 0.09 0.11 0.10 1150
Accuracy: 0.10782608695652174
In [52]:
labels_cm_bow_rf = category_list_bow
cm = confusion_matrix(y_test_bow, y_pred_rf_bow)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_rf); ax.yaxis.set_ticklabels(labels_cm_bow_rf);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
3. XGBoost
In [53]:
from sklearn.preprocessing import LabelEncoder
bow_xgb_model = xgb.XGBClassifier(random_state=1)
label_encoder = LabelEncoder()
# Sınıf etiketlerini sayısal değerlere dönüştürme
y_train_encoded = label_encoder.fit_transform(y_train_bow)
bow_xgb_model.fit(X_train_bow, y_train_encoded)
y_test_encoded = label_encoder.transform(y_test_bow)
y_pred_xgb_bow = bow_xgb_model.predict(X_test_bow)
print(classification_report(y_test_encoded, y_pred_xgb_bow))
accuracy_xgb_bow = accuracy_score(y_test_encoded, y_pred_xgb_bow)
print("Accuracy:", accuracy_xgb_bow)
precision recall f1-score support
0 0.03 0.02 0.02 93
1 0.15 0.22 0.18 129
2 0.26 0.22 0.24 27
3 0.00 0.00 0.00 24
4 0.00 0.00 0.00 19
5 0.02 0.02 0.02 56
6 0.07 0.06 0.07 65
7 0.00 0.00 0.00 22
8 0.00 0.00 0.00 20
9 0.02 0.02 0.02 56
10 0.00 0.00 0.00 24
11 0.05 0.05 0.05 39
12 0.00 0.00 0.00 21
13 0.00 0.00 0.00 89
14 0.50 0.27 0.35 15
15 0.00 0.00 0.00 17
16 0.00 0.00 0.00 13
17 0.11 0.12 0.11 68
18 0.04 0.05 0.04 42
19 0.22 0.28 0.24 272
20 0.00 0.00 0.00 39
accuracy 0.12 1150
macro avg 0.07 0.06 0.06 1150
weighted avg 0.10 0.12 0.11 1150
Accuracy: 0.11565217391304349
In [54]:
labels_cm_bow_xgb = category_list_bow
cm = confusion_matrix(y_test_encoded, y_pred_xgb_bow)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_xgb); ax.yaxis.set_ticklabels(labels_cm_bow_xgb);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
4. LightGBM:
In [55]:
bow_lgb_model = lgb.LGBMClassifier(random_state=1)
bow_lgb_model.fit(X_train_bow, y_train_bow)
y_pred_lgb_bow = bow_lgb_model.predict(X_test_bow)
print(classification_report(y_test_bow, y_pred_lgb_bow))
accuracy_lgb_bow = accuracy_score(y_test_bow, y_pred_lgb_bow)
print("Accuracy:", accuracy_lgb_bow)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 119850
[LightGBM] [Info] Number of data points in the train set: 3447, number of used features: 470
[LightGBM] [Info] Start training from score -2.517638
[LightGBM] [Info] Start training from score -2.186835
[LightGBM] [Info] Start training from score -3.750810
[LightGBM] [Info] Start training from score -3.868593
[LightGBM] [Info] Start training from score -4.119908
[LightGBM] [Info] Start training from score -3.021296
[LightGBM] [Info] Start training from score -2.877401
[LightGBM] [Info] Start training from score -3.925752
[LightGBM] [Info] Start training from score -4.034386
[LightGBM] [Info] Start training from score -3.027266
[LightGBM] [Info] Start training from score -3.896764
[LightGBM] [Info] Start training from score -3.391669
[LightGBM] [Info] Start training from score -4.018125
[LightGBM] [Info] Start training from score -2.561763
[LightGBM] [Info] Start training from score -4.316618
[LightGBM] [Info] Start training from score -4.174968
[LightGBM] [Info] Start training from score -4.456380
[LightGBM] [Info] Start training from score -2.822250
[LightGBM] [Info] Start training from score -3.324978
[LightGBM] [Info] Start training from score -1.440845
[LightGBM] [Info] Start training from score -3.383086
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
precision recall f1-score support
Cardiovascular / Pulmonary 0.07 0.06 0.07 93
Consult - History and Phy. 0.15 0.21 0.18 129
Discharge Summary 0.22 0.15 0.18 27
ENT - Otolaryngology 0.00 0.00 0.00 24
Emergency Room Reports 0.00 0.00 0.00 19
Gastroenterology 0.00 0.00 0.00 56
General Medicine 0.05 0.05 0.05 65
Hematology - Oncology 0.00 0.00 0.00 22
Nephrology 0.00 0.00 0.00 20
Neurology 0.04 0.04 0.04 56
Neurosurgery 0.00 0.00 0.00 24
Obstetrics / Gynecology 0.08 0.08 0.08 39
Ophthalmology 0.06 0.05 0.05 21
Orthopedic 0.02 0.01 0.01 89
Pain Management 0.33 0.20 0.25 15
Pediatrics - Neonatal 0.00 0.00 0.00 17
Psychiatry / Psychology 0.00 0.00 0.00 13
Radiology 0.09 0.10 0.10 68
SOAP / Chart / Progress Notes 0.04 0.05 0.05 42
Surgery 0.22 0.27 0.24 272
Urology 0.00 0.00 0.00 39
accuracy 0.11 1150
macro avg 0.07 0.06 0.06 1150
weighted avg 0.10 0.11 0.11 1150
Accuracy: 0.11478260869565217
In [56]:
labels_cm_bow_lgbm = category_list_bow
cm = confusion_matrix(y_test_bow, y_pred_lgb_bow)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_lgbm); ax.yaxis.set_ticklabels(labels_cm_bow_lgbm);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
5. Multinomial NB:
In [57]:
X_train_nb_bow, X_test_nb_bow, y_train_nb_bow, y_test_nb_bow = train_test_split(bow_matrix.toarray(), labels_bow, stratify=labels_bow, random_state=1)
bow_nb_model = MultinomialNB()
bow_nb_model.fit(X_train_nb_bow, y_train_nb_bow)
y_pred_nb_bow = bow_nb_model.predict(X_test_nb_bow)
print(classification_report(y_test_nb_bow, y_pred_nb_bow, zero_division=1))
accuracy_nb_bow = accuracy_score(y_test_nb_bow, y_pred_nb_bow)
print("Accuracy:", accuracy_nb_bow)
precision recall f1-score support
Cardiovascular / Pulmonary 0.35 0.25 0.29 93
Consult - History and Phy. 0.32 0.54 0.40 129
Discharge Summary 0.46 0.63 0.53 27
ENT - Otolaryngology 0.54 0.29 0.38 24
Emergency Room Reports 0.13 0.11 0.12 19
Gastroenterology 0.37 0.29 0.32 56
General Medicine 0.13 0.09 0.11 65
Hematology - Oncology 0.00 0.00 1.00 22
Nephrology 0.25 0.05 0.08 20
Neurology 0.46 0.38 0.41 56
Neurosurgery 0.27 0.50 0.35 24
Obstetrics / Gynecology 0.21 0.26 0.23 39
Ophthalmology 0.48 0.67 0.56 21
Orthopedic 0.39 0.25 0.30 89
Pain Management 0.43 0.80 0.56 15
Pediatrics - Neonatal 0.00 0.00 1.00 17
Psychiatry / Psychology 0.14 0.08 0.10 13
Radiology 0.35 0.44 0.39 68
SOAP / Chart / Progress Notes 0.26 0.50 0.34 42
Surgery 0.43 0.35 0.39 272
Urology 0.22 0.18 0.20 39
accuracy 0.34 1150
macro avg 0.29 0.32 0.38 1150
weighted avg 0.34 0.34 0.36 1150
Accuracy: 0.3373913043478261
In [58]:
labels_cm_bow_nb = category_list_bow
cm = confusion_matrix(y_test_nb_bow, y_pred_nb_bow)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_nb); ax.yaxis.set_ticklabels(labels_cm_bow_nb);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
Second Phase (Second Paragraph)
Feature Extraction
TF-IDF:
In [59]:
tfidfVectorizer_2 = TfidfVectorizer(analyzer='word', stop_words='english', ngram_range=(1,3), max_df=0.75, use_idf=True, smooth_idf=True, max_features=1000)
tfIdfMat_2 = tfidfVectorizer_2.fit_transform(ner_model_df['ner_named_entity'].tolist())
tfidf_feature_names_2 = sorted(tfidfVectorizer_2.get_feature_names_out())
print(tfidf_feature_names_2)
['000', '000 epinephrine', '10', '10 mcg', '10 mg', '10 units', '10 units subcutaneous', '100', '100 000', '100 000 epinephrine', '100 mg', '1000', '1000 mg', '125', '125 mg', '141', '145', '145 mg', '150', '150 mg', '1500', '1500 twice', '20', '20 meq', '20 mg', '20 ml', '200', '200 000', '200 000 epinephrine', '240', '240 mg', '25', '25 mg', '300', '300 mg', '375', '375 mg', '40', '40 mg', '450', '450 mg', '50', '50 mg', '600', '600 mg', '75', '75 mg', '800', '800 mg', '81', '81 mg', '90', '90 mg', 'abdominal', 'abdominal pain', 'abductovalgus', 'abilify', 'abilify mg', 'abnormalities', 'abscess', 'abscessed', 'abuse', 'aches', 'acidosis', 'adhd', 'agitation', 'albuterol', 'alcohol', 'allergies', 'allergy', 'ama', 'amoxicillin', 'anastomotic', 'anastomotic stricture', 'anorexia', 'antacid', 'anxiety', 'artery', 'artery disease', 'arthritis', 'asa', 'aspirin', 'aspirin 81', 'aspirin 81 mg', 'aspirin tablet', 'asystole', 'atrial', 'atrial fibrillation', 'atrovent', 'av', 'av dissociation', 'axis', 'axis ii', 'b12', 'barium', 'basal', 'basal cell', 'basal cell nevus', 'benadryl', 'benzodiazepines', 'beverage', 'beverage syndrome', 'bicarbonate', 'bipolar', 'bipolar disorder', 'bleeding', 'block', 'blood', 'blood loss', 'bowel', 'bowel obstruction', 'bowel obstruction type', 'brain', 'brain edema', 'breath', 'breath past', 'bronchitis', 'bruxism', 'bunion', 'bunionette', 'calcium', 'cancer', 'cancer metastasis', 'carbonated', 'carbonated beverage', 'carbonated beverage syndrome', 'cardiac', 'cardiac disease', 'cardizem', 'care', 'care thank', 'carious', 'carious teeth', 'cataracts', 'cell', 'cell lung', 'cell lung cancer', 'cell nevus', 'cell nevus syndrome', 'cellulitis', 'cellulitis leg', 'chest', 'chest pain', 'chills', 'chloride', 'chloride 20', 'chloride 20 meq', 'cholesterol', 'chronic', 'chronic obstructive', 'chronic obstructive pulmonary', 'clindamycin', 'clubbing', 'cocaine', 'colace', 'colace 100', 'colace 100 mg', 'compazine', 'complaints', 'complaints shortness', 'complaints shortness breath', 'compression', 'compression fractures', 'confusion', 'consciousness', 'contusion', 'copd', 'coronary', 'coronary artery', 'coronary artery disease', 'cough', 'cranial', 'cranial nerves', 'cranial nerves ii', 'creatinine', 'crohn', 'crohn disease', 'cva', 'cyanosis', 'cymbalta', 'darvocet', 'deformity', 'degenerative', 'degenerative joint', 'degenerative joint disease', 'delusions', 'depakote', 'depakote 1000', 'depakote 1000 mg', 'depo', 'depo medrol', 'depressed', 'depression', 'depression disorder', 'depression disorder axis', 'depressive', 'depressive symptoms', 'dexamethasone', 'dexamethasone mg', 'dexamethasone phosphate', 'diabetes', 'diabetes mellitus', 'diagnosis', 'diagnosis social', 'digoxin', 'digoxin 125', 'digoxin 125 mg', 'dilantin', 'dilantin 300', 'dilantin 300 mg', 'disability', 'disease', 'disease gastritis', 'disorder', 'disorder axis', 'disorder axis ii', 'dissociation', 'distress', 'diverticular', 'diverticular disease', 'dmso', 'docusate', 'docusate 100', 'docusate 100 mg', 'duragesic', 'dvt', 'dysfunction', 'dysfunction postoperative', 'dysphasia', 'dyspnea', 'earache', 'edema', 'elavil', 'elavil 50', 'elavil 50 mg', 'elmiron', 'elmiron 100', 'elmiron 100 mg', 'epinephrine', 'epinephrine ml', 'erectile', 'erectile dysfunction', 'erosion', 'erosions', 'erythema', 'esophageal', 'esophageal reflux', 'esophageal reflux disease', 'esophageal spasm', 'exostosis', 'facet', 'facet joints', 'fatigue', 'fever', 'fevers', 'fibrillation', 'fibrosis', 'flashbacks', 'foot', 'foot procedures', 'fracture', 'fractures', 'gastritis', 'gastro', 'gastro esophageal', 'gastro esophageal reflux', 'gerd', 'glioblastoma', 'glioblastoma multiforme', 'grave', 'grave disability', 'haldol', 'hallucinations', 'hallux', 'hallux abductovalgus', 'hallux interphalangeus', 'haloperidol', 'haloperidol mg', 'headaches', 'heartburn', 'hemi', 'hemi implant', 'hemiparesis', 'hemoptysis', 'hemorrhage', 'heparin', 'hepatitis', 'hepatosplenomegaly', 'hernia', 'hiatal', 'hiatal hernia', 'hip', 'hip joints', 'hodgkin', 'hodgkin lymphoma', 'hole', 'hole titanium', 'hyperactivity', 'hypertension', 'hypertrophic', 'hypertrophic exostosis', 'hypertrophic synovium', 'hyperventilation', 'ibuprofen', 'ibuprofen 600', 'ibuprofen 600 mg', 'ideations', 'ii', 'ii block', 'implant', 'impulsivity', 'imuran', 'imuran 100', 'imuran 100 mg', 'indurated', 'infection', 'infections', 'inflammation', 'inpatient', 'inpatient psychiatric', 'inpatient psychiatric care', 'int', 'intercostal', 'intercostal block', 'intermittent', 'intermittent obstructive', 'intermittent obstructive symptoms', 'interphalangeus', 'interphalangeus deformity', 'intracranial', 'intracranial abnormalities', 'iron', 'iron 240', 'iron 240 mg', 'joint', 'joint disease', 'joint syndrome', 'joints', 'keflex', 'kenalog', 'keppra', 'keppra 1500', 'keppra 1500 twice', 'klonopin', 'klonopin mg', 'lasix', 'lasix 20', 'lasix 20 mg', 'lateral', 'lateral exostosis', 'left', 'left mandibular', 'left mandibular vestibular', 'leg', 'leg pain', 'lexapro', 'lexapro 10', 'lexapro 10 mg', 'lidocaine', 'lithium', 'lithium 450', 'lithium 450 mg', 'liver', 'liver disease', 'lopressor', 'lopressor 75', 'lopressor 75 mg', 'lortab', 'losartan', 'losartan 50', 'losartan 50 mg', 'loss', 'loss consciousness', 'lovenox', 'lung', 'lung cancer', 'lung cancer metastasis', 'lupron', 'lymphadenopathy', 'lymphoma', 'malignant', 'malignant meningioma', 'mall', 'mall occlusion', 'malocclusion', 'mandibular', 'mandibular vestibular', 'mandibular vestibular abscess', 'mania', 'manic', 'marcaine', 'marcaine 25', 'masticatory', 'masticatory dysfunction', 'masticatory dysfunction postoperative', 'mcg', 'medrol', 'mellitus', 'meningioma', 'meq', 'metastasis', 'mg', 'migraine', 'migraine headaches', 'migraine type', 'migraine type headaches', 'migraines', 'ml', 'mobitz', 'mobitz type', 'mobitz type ii', 'months', 'morphine', 'morphine mg', 'motrin', 'motrin 800', 'motrin 800 mg', 'mouthwash', 'multiforme', 'multivitamin', 'muscle', 'muscle weakness', 'myelopathy', 'naprosyn', 'naprosyn 375', 'naprosyn 375 mg', 'nausea', 'negative', 'negative past', 'nerve', 'nerve block', 'nerves', 'nerves ii', 'neurontin', 'neurontin 300', 'neurontin 300 mg', 'neuropathy', 'nevus', 'nevus syndrome', 'nexium', 'nexium 40', 'nexium 40 mg', 'nifedipine', 'nifedipine 10', 'nifedipine 10 mg', 'nifedipine 90', 'nifedipine 90 mg', 'non', 'non small', 'non small cell', 'nph', 'nph 10', 'nph 10 units', 'numbness', 'obesity', 'obstruction', 'obstruction type', 'obstruction type symptoms', 'obstructions', 'obstructive', 'obstructive pulmonary', 'obstructive pulmonary disease', 'obstructive symptoms', 'obtundation', 'occlusion', 'odynophagia', 'omeprazole', 'omeprazole 20', 'omeprazole 20 mg', 'osteoporosis', 'otitis', 'oxycodone', 'oxycontin', 'oxycontin 10', 'oxycontin 10 mg', 'oxygen', 'pain', 'painful', 'painful bunion', 'pains', 'palate', 'panic', 'past', 'paxil', 'paxil 10', 'paxil 10 mg', 'paxil 20', 'paxil 20 mg', 'penicillin', 'pepcid', 'peptic', 'peptic ulcer', 'peptic ulcer disease', 'percocet', 'peripheral', 'peripheral neuropathy', 'phosphate', 'pinprick', 'pinprick spine', 'pins', 'plasmacytoma', 'platelet', 'platelet rich', 'plavix', 'polysubstance', 'polysubstance abuse', 'postoperative', 'potassium', 'potassium chloride', 'prednisone', 'prednisone 20', 'prednisone 20 mg', 'premarin', 'premarin 25', 'premarin 25 mg', 'prilosec', 'primarily', 'primarily pain', 'primary', 'primary psychotic', 'primary psychotic symptoms', 'procedures', 'prostate', 'prostate cancer', 'psychiatric', 'psychiatric care', 'psychiatric care thank', 'psychomotor', 'psychomotor hyperactivity', 'psychotic', 'psychotic symptoms', 'pulmonary', 'pulmonary disease', 'pulmonary fibrosis', 'pylori', 'recurrent', 'recurrent bowel', 'recurrent bowel obstruction', 'reflux', 'reflux disease', 'reflux type', 'respiratory', 'respiratory acidosis', 'respiratory distress', 'respiratory infections', 'restless', 'rheumatoid', 'rheumatoid arthritis', 'rich', 'right', 'right leg', 'right leg pain', 'sacroiliac', 'sacroiliac joint', 'sacroiliac joint syndrome', 'sacroiliac joints', 'seizures', 'sensations', 'sepsis', 'septic', 'septic shock', 'septicemia', 'seroquel', 'seroquel 1000', 'seroquel 1000 mg', 'shock', 'shortness', 'shortness breath', 'small', 'small cell', 'small cell lung', 'social', 'sodium', 'sodium 141', 'sodium bicarbonate', 'sore', 'sore throats', 'spasm', 'spine', 'spiriva', 'spiriva 10', 'spiriva 10 mcg', 'spondylosis', 'standby', 'statin', 'stepfather', 'stricture', 'strokes', 'subcutaneous', 'sweats', 'swelling', 'symptoms', 'syndrome', 'synovium', 'tablet', 'tca', 'teeth', 'temodar', 'temodar 100', 'temodar 100 mg', 'tenderness', 'thank', 'thoracic', 'thoracic compression', 'thoracic compression fractures', 'throat', 'throats', 'thrombocytopenia', 'thrombophlebitis', 'titanium', 'tooth', 'topamax', 'topamax 100', 'topamax 100 mg', 'toprol', 'trauma', 'tricor', 'tricor 145', 'tricor 145 mg', 'tumor', 'twice', 'tylenol', 'type', 'type headaches', 'type ii', 'type ii block', 'type symptoms', 'ulcer', 'ulcer disease', 'ulcer disease gastritis', 'units', 'units subcutaneous', 'varix', 'vestibular', 'vestibular abscess', 'vicodin', 'vitamin', 'vitamin b12', 'vomiting', 'vp', 'warfarin', 'warfarin mg', 'weakness', 'weight', 'weight loss', 'wellbutrin', 'wellbutrin 300', 'wellbutrin 300 mg', 'xanax', 'xanax 25', 'xanax 25 mg', 'xanax mg', 'xylocaine', 'xylocaine 20', 'xylocaine 20 ml', 'zantac', 'zantac 150', 'zantac 150 mg', 'zestril', 'zofran', 'zofran mg', 'zoloft', 'zoloft 100', 'zoloft 100 mg']
Bag-of-Words (CountVectorizer):
In [61]:
# CountVectorizer (Bag-of-Words)
countVectorizer_2 = CountVectorizer(analyzer='word', stop_words='english', ngram_range=(1, 3), max_df=0.75, max_features=1000)
bow_matrix_2 = countVectorizer_2.fit_transform(ner_model_df['ner_named_entity'].tolist())
bow_feature_names_2 = sorted(countVectorizer_2.get_feature_names_out())
print(bow_feature_names_2)
['000', '000 epinephrine', '10', '10 mcg', '10 mg', '10 units', '10 units subcutaneous', '100', '100 000', '100 000 epinephrine', '100 mg', '1000', '1000 mg', '125', '125 mg', '141', '145', '145 mg', '150', '150 mg', '1500', '1500 twice', '20', '20 meq', '20 mg', '20 ml', '200', '200 000', '200 000 epinephrine', '240', '240 mg', '25', '25 mg', '300', '300 mg', '375', '375 mg', '40', '40 mg', '450', '450 mg', '50', '50 mg', '600', '600 mg', '75', '75 mg', '800', '800 mg', '81', '81 mg', '90', '90 mg', 'abdominal', 'abdominal pain', 'abductovalgus', 'abilify', 'abilify mg', 'abnormalities', 'abscess', 'abscessed', 'abuse', 'aches', 'acidosis', 'adhd', 'agitation', 'albuterol', 'alcohol', 'allergies', 'allergy', 'ama', 'amoxicillin', 'anastomotic', 'anastomotic stricture', 'anorexia', 'antacid', 'anxiety', 'artery', 'artery disease', 'arthritis', 'asa', 'aspirin', 'aspirin 81', 'aspirin 81 mg', 'aspirin tablet', 'asystole', 'atrial', 'atrial fibrillation', 'atrovent', 'av', 'av dissociation', 'axis', 'axis ii', 'b12', 'barium', 'basal', 'basal cell', 'basal cell nevus', 'benadryl', 'benzodiazepines', 'beverage', 'beverage syndrome', 'bicarbonate', 'bipolar', 'bipolar disorder', 'bleeding', 'block', 'blood', 'blood loss', 'bowel', 'bowel obstruction', 'bowel obstruction type', 'brain', 'brain edema', 'breath', 'breath past', 'bronchitis', 'bruxism', 'bunion', 'bunionette', 'calcium', 'cancer', 'cancer metastasis', 'carbonated', 'carbonated beverage', 'carbonated beverage syndrome', 'cardiac', 'cardiac disease', 'cardizem', 'care', 'care thank', 'carious', 'carious teeth', 'cataracts', 'cell', 'cell lung', 'cell lung cancer', 'cell nevus', 'cell nevus syndrome', 'cellulitis', 'cellulitis leg', 'chest', 'chest pain', 'chills', 'chloride', 'chloride 20', 'chloride 20 meq', 'cholesterol', 'chronic', 'chronic obstructive', 'chronic obstructive pulmonary', 'clindamycin', 'clubbing', 'cocaine', 'colace', 'colace 100', 'colace 100 mg', 'compazine', 'complaints', 'complaints shortness', 'complaints shortness breath', 'compression', 'compression fractures', 'confusion', 'consciousness', 'contusion', 'copd', 'coronary', 'coronary artery', 'coronary artery disease', 'cough', 'cranial', 'cranial nerves', 'cranial nerves ii', 'creatinine', 'crohn', 'crohn disease', 'cva', 'cyanosis', 'cymbalta', 'darvocet', 'deformity', 'degenerative', 'degenerative joint', 'degenerative joint disease', 'delusions', 'depakote', 'depakote 1000', 'depakote 1000 mg', 'depo', 'depo medrol', 'depressed', 'depression', 'depression disorder', 'depression disorder axis', 'depressive', 'depressive symptoms', 'dexamethasone', 'dexamethasone mg', 'dexamethasone phosphate', 'diabetes', 'diabetes mellitus', 'diagnosis', 'diagnosis social', 'digoxin', 'digoxin 125', 'digoxin 125 mg', 'dilantin', 'dilantin 300', 'dilantin 300 mg', 'disability', 'disease', 'disease gastritis', 'disorder', 'disorder axis', 'disorder axis ii', 'dissociation', 'distress', 'diverticular', 'diverticular disease', 'dmso', 'docusate', 'docusate 100', 'docusate 100 mg', 'duragesic', 'dvt', 'dysfunction', 'dysfunction postoperative', 'dysphasia', 'dyspnea', 'earache', 'edema', 'elavil', 'elavil 50', 'elavil 50 mg', 'elmiron', 'elmiron 100', 'elmiron 100 mg', 'epinephrine', 'epinephrine ml', 'erectile', 'erectile dysfunction', 'erosion', 'erosions', 'erythema', 'esophageal', 'esophageal reflux', 'esophageal reflux disease', 'esophageal spasm', 'exostosis', 'facet', 'facet joints', 'fatigue', 'fever', 'fevers', 'fibrillation', 'fibrosis', 'flashbacks', 'foot', 'foot procedures', 'fracture', 'fractures', 'gastritis', 'gastro', 'gastro esophageal', 'gastro esophageal reflux', 'gerd', 'glioblastoma', 'glioblastoma multiforme', 'grave', 'grave disability', 'haldol', 'hallucinations', 'hallux', 'hallux abductovalgus', 'hallux interphalangeus', 'haloperidol', 'haloperidol mg', 'headaches', 'heartburn', 'hemi', 'hemi implant', 'hemiparesis', 'hemoptysis', 'hemorrhage', 'heparin', 'hepatitis', 'hepatosplenomegaly', 'hernia', 'hiatal', 'hiatal hernia', 'hip', 'hip joints', 'hodgkin', 'hodgkin lymphoma', 'hole', 'hole titanium', 'hyperactivity', 'hypertension', 'hypertrophic', 'hypertrophic exostosis', 'hypertrophic synovium', 'hyperventilation', 'ibuprofen', 'ibuprofen 600', 'ibuprofen 600 mg', 'ideations', 'ii', 'ii block', 'implant', 'impulsivity', 'imuran', 'imuran 100', 'imuran 100 mg', 'indurated', 'infection', 'infections', 'inflammation', 'inpatient', 'inpatient psychiatric', 'inpatient psychiatric care', 'int', 'intercostal', 'intercostal block', 'intermittent', 'intermittent obstructive', 'intermittent obstructive symptoms', 'interphalangeus', 'interphalangeus deformity', 'intracranial', 'intracranial abnormalities', 'iron', 'iron 240', 'iron 240 mg', 'joint', 'joint disease', 'joint syndrome', 'joints', 'keflex', 'kenalog', 'keppra', 'keppra 1500', 'keppra 1500 twice', 'klonopin', 'klonopin mg', 'lasix', 'lasix 20', 'lasix 20 mg', 'lateral', 'lateral exostosis', 'left', 'left mandibular', 'left mandibular vestibular', 'leg', 'leg pain', 'lexapro', 'lexapro 10', 'lexapro 10 mg', 'lidocaine', 'lithium', 'lithium 450', 'lithium 450 mg', 'liver', 'liver disease', 'lopressor', 'lopressor 75', 'lopressor 75 mg', 'lortab', 'losartan', 'losartan 50', 'losartan 50 mg', 'loss', 'loss consciousness', 'lovenox', 'lung', 'lung cancer', 'lung cancer metastasis', 'lupron', 'lymphadenopathy', 'lymphoma', 'malignant', 'malignant meningioma', 'mall', 'mall occlusion', 'malocclusion', 'mandibular', 'mandibular vestibular', 'mandibular vestibular abscess', 'mania', 'manic', 'marcaine', 'marcaine 25', 'masticatory', 'masticatory dysfunction', 'masticatory dysfunction postoperative', 'mcg', 'medrol', 'mellitus', 'meningioma', 'meq', 'metastasis', 'mg', 'migraine', 'migraine headaches', 'migraine type', 'migraine type headaches', 'migraines', 'ml', 'mobitz', 'mobitz type', 'mobitz type ii', 'months', 'morphine', 'morphine mg', 'motrin', 'motrin 800', 'motrin 800 mg', 'mouthwash', 'multiforme', 'multivitamin', 'muscle', 'muscle weakness', 'myelopathy', 'naprosyn', 'naprosyn 375', 'naprosyn 375 mg', 'nausea', 'negative', 'negative past', 'nerve', 'nerve block', 'nerves', 'nerves ii', 'neurontin', 'neurontin 300', 'neurontin 300 mg', 'neuropathy', 'nevus', 'nevus syndrome', 'nexium', 'nexium 40', 'nexium 40 mg', 'nifedipine', 'nifedipine 10', 'nifedipine 10 mg', 'nifedipine 90', 'nifedipine 90 mg', 'non', 'non small', 'non small cell', 'nph', 'nph 10', 'nph 10 units', 'numbness', 'obesity', 'obstruction', 'obstruction type', 'obstruction type symptoms', 'obstructions', 'obstructive', 'obstructive pulmonary', 'obstructive pulmonary disease', 'obstructive symptoms', 'obtundation', 'occlusion', 'odynophagia', 'omeprazole', 'omeprazole 20', 'omeprazole 20 mg', 'osteoporosis', 'otitis', 'oxycodone', 'oxycontin', 'oxycontin 10', 'oxycontin 10 mg', 'oxygen', 'pain', 'painful', 'painful bunion', 'pains', 'palate', 'panic', 'past', 'paxil', 'paxil 10', 'paxil 10 mg', 'paxil 20', 'paxil 20 mg', 'penicillin', 'pepcid', 'peptic', 'peptic ulcer', 'peptic ulcer disease', 'percocet', 'peripheral', 'peripheral neuropathy', 'phosphate', 'pinprick', 'pinprick spine', 'pins', 'plasmacytoma', 'platelet', 'platelet rich', 'plavix', 'polysubstance', 'polysubstance abuse', 'postoperative', 'potassium', 'potassium chloride', 'prednisone', 'prednisone 20', 'prednisone 20 mg', 'premarin', 'premarin 25', 'premarin 25 mg', 'prilosec', 'primarily', 'primarily pain', 'primary', 'primary psychotic', 'primary psychotic symptoms', 'procedures', 'prostate', 'prostate cancer', 'psychiatric', 'psychiatric care', 'psychiatric care thank', 'psychomotor', 'psychomotor hyperactivity', 'psychotic', 'psychotic symptoms', 'pulmonary', 'pulmonary disease', 'pulmonary fibrosis', 'pylori', 'recurrent', 'recurrent bowel', 'recurrent bowel obstruction', 'reflux', 'reflux disease', 'reflux type', 'respiratory', 'respiratory acidosis', 'respiratory distress', 'respiratory infections', 'restless', 'rheumatoid', 'rheumatoid arthritis', 'rich', 'right', 'right leg', 'right leg pain', 'sacroiliac', 'sacroiliac joint', 'sacroiliac joint syndrome', 'sacroiliac joints', 'seizures', 'sensations', 'sepsis', 'septic', 'septic shock', 'septicemia', 'seroquel', 'seroquel 1000', 'seroquel 1000 mg', 'shock', 'shortness', 'shortness breath', 'small', 'small cell', 'small cell lung', 'social', 'sodium', 'sodium 141', 'sodium bicarbonate', 'sore', 'sore throats', 'spasm', 'spine', 'spiriva', 'spiriva 10', 'spiriva 10 mcg', 'spondylosis', 'standby', 'statin', 'stepfather', 'stricture', 'strokes', 'subcutaneous', 'sweats', 'swelling', 'symptoms', 'syndrome', 'synovium', 'tablet', 'tca', 'teeth', 'temodar', 'temodar 100', 'temodar 100 mg', 'tenderness', 'thank', 'thoracic', 'thoracic compression', 'thoracic compression fractures', 'throat', 'throats', 'thrombocytopenia', 'thrombophlebitis', 'titanium', 'tooth', 'topamax', 'topamax 100', 'topamax 100 mg', 'toprol', 'trauma', 'tricor', 'tricor 145', 'tricor 145 mg', 'tumor', 'twice', 'tylenol', 'type', 'type headaches', 'type ii', 'type ii block', 'type symptoms', 'ulcer', 'ulcer disease', 'ulcer disease gastritis', 'units', 'units subcutaneous', 'varix', 'vestibular', 'vestibular abscess', 'vicodin', 'vitamin', 'vitamin b12', 'vomiting', 'vp', 'warfarin', 'warfarin mg', 'weakness', 'weight', 'weight loss', 'wellbutrin', 'wellbutrin 300', 'wellbutrin 300 mg', 'xanax', 'xanax 25', 'xanax 25 mg', 'xanax mg', 'xylocaine', 'xylocaine 20', 'xylocaine 20 ml', 'zantac', 'zantac 150', 'zantac 150 mg', 'zestril', 'zofran', 'zofran mg', 'zoloft', 'zoloft 100', 'zoloft 100 mg']
PCA (Principal Component Analysis)
In [81]:
gc.collect()
pca = PCA(n_components=0.95)
For TF-IDF:
In [128]:
tfIdfMat_reduced_2 = pca.fit_transform(tfIdfMat_2.toarray())
labels_tfIdfMat_2 = ner_model_df['ner_category_label'].tolist()
category_list_tfIdf_2 = ner_model_df['ner_category_label'].unique()
For BOW:
In [129]:
bow_reduced_2 = pca.fit_transform(bow_matrix_2.toarray())
labels_bow_2 = ner_model_df['ner_category_label'].tolist()
category_list_bow_2 = ner_model_df['ner_category_label'].unique()
Implement Algorithms & Architecture
With TF-IDF:
In [92]:
X_train_tfIdf_2, X_test_tfIdf_2, y_train_tfIdf_2, y_test_tfIdf_2 = train_test_split(tfIdfMat_reduced_2, labels_tfIdfMat_2, stratify=labels_tfIdfMat_2, random_state=1)
print('Train_Set_Size:'+str(X_train_tfIdf_2.shape))
print('Test_Set_Size:'+str(X_test_tfIdf_2.shape))
Train_Set_Size:(1511, 249) Test_Set_Size:(504, 249)
Ensemble Learning
In [93]:
def create_cnn_tfIdf(input_shape, vocab_size, embedding_dim):
input_layer = Input(shape=input_shape)
x = Embedding(input_dim=vocab_size, output_dim=embedding_dim)(input_layer)
x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
x = GlobalMaxPooling1D()(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_lstm_tfIdf(input_shape, vocab_size, embedding_dim):
input_layer = Input(shape=input_shape)
x = Embedding(input_dim=vocab_size, output_dim=embedding_dim)(input_layer)
x = LSTM(50, return_sequences=True)(x)
x = LSTM(50)(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_gru_tfIdf(input_shape, vocab_size, embedding_dim):
input_layer = Input(shape=input_shape)
x = Embedding(input_dim=vocab_size, output_dim=embedding_dim)(input_layer)
x = GRU(50, return_sequences=True)(x)
x = GRU(50)(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_ensemble_tfIdf(input_shape, vocab_size, embedding_dim, num_classes):
cnn = create_cnn_tfIdf(input_shape, vocab_size, embedding_dim)
lstm = create_lstm_tfIdf(input_shape, vocab_size, embedding_dim)
gru = create_gru_tfIdf(input_shape, vocab_size, embedding_dim)
concatenated = concatenate([cnn.output, lstm.output, gru.output])
x = Dense(100, activation='relu')(concatenated)
output = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[cnn.input, lstm.input, gru.input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
In [94]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
input_shape = X_train_tfIdf_2.shape[1]
num_classes = len(category_list_tfIdf_2)
vocab_size = len(category_list_tfIdf_2)
embedding_dim = 50
# Creating model
model_ensemble_tfIdf_2 = create_ensemble_tfIdf(input_shape, vocab_size, embedding_dim, num_classes)
model_ensemble_tfIdf_2.summary()
# LabelEncoder:
label_encoder = LabelEncoder()
y_train_encoded_ensemble_tfIdf_2 = label_encoder.fit_transform(y_train_tfIdf_2)
y_test_encoded_ensemble_tfIdf_2 = label_encoder.transform(y_test_tfIdf_2)
# Train model
history_ensemble_tfIdf_2 = model_ensemble_tfIdf_2.fit([X_train_tfIdf_2, X_train_tfIdf_2, X_train_tfIdf_2], y_train_encoded_ensemble_tfIdf_2, epochs=1, batch_size=32)
# Predict
y_pred_ensemble_tfIdf_2 = model_ensemble_tfIdf_2.predict([X_test_tfIdf_2, X_test_tfIdf_2, X_test_tfIdf_2])
y_pred_ensemble_tfIdf_2 = np.argmax(y_pred_ensemble_tfIdf_2, axis=1)
print(classification_report(y_test_encoded_ensemble_tfIdf_2, y_pred_ensemble_tfIdf_2, target_names=category_list_tfIdf_2, zero_division=1))
accuracy_ensemble_tfIdf_2 = accuracy_score(y_test_encoded_ensemble_tfIdf_2, y_pred_ensemble_tfIdf_2)
print("Accuracy:", accuracy_ensemble_tfIdf_2)
Model: "model_15"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_10 (InputLayer) [(None, 249)] 0 []
embedding_6 (Embedding) (None, 249, 50) 150 ['input_10[0][0]']
input_11 (InputLayer) [(None, 249)] 0 []
input_12 (InputLayer) [(None, 249)] 0 []
conv1d_4 (Conv1D) (None, 247, 64) 9664 ['embedding_6[0][0]']
embedding_7 (Embedding) (None, 249, 50) 150 ['input_11[0][0]']
embedding_8 (Embedding) (None, 249, 50) 150 ['input_12[0][0]']
conv1d_5 (Conv1D) (None, 245, 64) 12352 ['conv1d_4[0][0]']
lstm_4 (LSTM) (None, 249, 50) 20200 ['embedding_7[0][0]']
gru_4 (GRU) (None, 249, 50) 15300 ['embedding_8[0][0]']
global_max_pooling1d_2 (Gl (None, 64) 0 ['conv1d_5[0][0]']
obalMaxPooling1D)
lstm_5 (LSTM) (None, 50) 20200 ['lstm_4[0][0]']
gru_5 (GRU) (None, 50) 15300 ['gru_4[0][0]']
concatenate_3 (Concatenate (None, 164) 0 ['global_max_pooling1d_2[0][0]
) ',
'lstm_5[0][0]',
'gru_5[0][0]']
dense_13 (Dense) (None, 100) 16500 ['concatenate_3[0][0]']
dense_14 (Dense) (None, 3) 303 ['dense_13[0][0]']
==================================================================================================
Total params: 110269 (430.74 KB)
Trainable params: 110269 (430.74 KB)
Non-trainable params: 0 (0.00 Byte)
__________________________________________________________________________________________________
48/48 [==============================] - 10s 113ms/step - loss: 0.8391 - accuracy: 0.6161
16/16 [==============================] - 2s 42ms/step
precision recall f1-score support
DRUG_DOSE 1.00 0.00 0.00 169
DISEASE 0.63 1.00 0.77 316
CHEMICAL 1.00 0.00 0.00 19
accuracy 0.63 504
macro avg 0.88 0.33 0.26 504
weighted avg 0.77 0.63 0.48 504
Accuracy: 0.626984126984127
1. LogisticRegression
In [99]:
tfIdf_lr_model_2 = LogisticRegression(penalty= 'elasticnet', solver= 'saga', l1_ratio=0.5, random_state=1).fit(X_train_tfIdf_2, y_train_tfIdf_2)
y_pred_lr_tfIdf_2 = tfIdf_lr_model_2.predict(X_test_tfIdf_2)
print(classification_report(y_test_tfIdf_2, y_pred_lr_tfIdf_2, labels=category_list_tfIdf_2, zero_division=1))
accuracy_lr_tfIdf_2 = accuracy_score(y_test_tfIdf_2, y_pred_lr_tfIdf_2)
print("Accuracy:", accuracy_lr_tfIdf_2)
precision recall f1-score support
DRUG_DOSE 1.00 0.37 0.54 19
DISEASE 0.96 1.00 0.98 316
CHEMICAL 0.94 0.93 0.93 169
accuracy 0.95 504
macro avg 0.97 0.77 0.82 504
weighted avg 0.95 0.95 0.95 504
Accuracy: 0.9523809523809523
In [100]:
labels_cm_tfIdf_lg_2 = category_list_tfIdf_2
cm = confusion_matrix(y_test_tfIdf_2, y_pred_lr_tfIdf_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g'); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_lg_2); ax.yaxis.set_ticklabels(labels_cm_tfIdf_lg_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
2. RandomForest:
In [101]:
tfIdf_rf_model_2 = RandomForestClassifier(random_state=1)
tfIdf_rf_model_2.fit(X_train_tfIdf_2, y_train_tfIdf_2)
y_pred_rf_tfIdf_2 = tfIdf_rf_model_2.predict(X_test_tfIdf_2)
print(classification_report(y_test_tfIdf_2, y_pred_rf_tfIdf_2))
accuracy_rf_tfIdf_2 = accuracy_score(y_test_tfIdf_2, y_pred_rf_tfIdf_2)
print("Accuracy:", accuracy_rf_tfIdf_2)
precision recall f1-score support
CHEMICAL 0.97 0.97 0.97 169
DISEASE 0.97 1.00 0.99 316
DRUG_DOSE 1.00 0.53 0.69 19
accuracy 0.97 504
macro avg 0.98 0.83 0.88 504
weighted avg 0.97 0.97 0.97 504
Accuracy: 0.9722222222222222
In [102]:
labels_cm_tfIdf_rf_2 = category_list_tfIdf_2
cm = confusion_matrix(y_test_tfIdf_2, y_pred_rf_tfIdf_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_rf_2); ax.yaxis.set_ticklabels(labels_cm_tfIdf_rf_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
3. XGBoost:
In [103]:
from sklearn.preprocessing import LabelEncoder
tfIdf_xgb_model_2 = xgb.XGBClassifier(random_state=1)
label_encoder = LabelEncoder()
# Sınıf etiketlerini sayısal değerlere dönüştürme
y_train_encoded_2 = label_encoder.fit_transform(y_train_tfIdf_2)
tfIdf_xgb_model_2.fit(X_train_tfIdf_2, y_train_encoded_2)
y_test_encoded_2 = label_encoder.transform(y_test_tfIdf_2)
y_pred_xgb_tfIdf_2 = tfIdf_xgb_model_2.predict(X_test_tfIdf_2)
print(classification_report(y_test_encoded_2, y_pred_xgb_tfIdf_2))
accuracy_xgb_tfIdf_2 = accuracy_score(y_test_encoded_2, y_pred_xgb_tfIdf_2)
print("Accuracy:", accuracy_xgb_tfIdf_2)
precision recall f1-score support
0 0.96 0.97 0.96 169
1 0.98 1.00 0.99 316
2 1.00 0.63 0.77 19
accuracy 0.97 504
macro avg 0.98 0.87 0.91 504
weighted avg 0.97 0.97 0.97 504
Accuracy: 0.9742063492063492
In [104]:
labels_cm_tfIdf_xgb_2 = category_list_tfIdf_2
cm = confusion_matrix(y_test_encoded_2, y_pred_xgb_tfIdf_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_xgb_2); ax.yaxis.set_ticklabels(labels_cm_tfIdf_xgb_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
4. LightGBM:
In [106]:
tfIdf_lgb_model_2 = lgb.LGBMClassifier(random_state=1)
tfIdf_lgb_model_2.fit(X_train_tfIdf_2, y_train_tfIdf_2)
y_pred_lgb_tfIdf_2 = tfIdf_lgb_model_2.predict(X_test_tfIdf_2)
print(classification_report(y_test_tfIdf_2, y_pred_lgb_tfIdf_2))
accuracy_lgb_tfIdf_2 = accuracy_score(y_test_tfIdf_2, y_pred_lgb_tfIdf_2)
print("Accuracy:", accuracy_lgb_tfIdf_2)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004513 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63193
[LightGBM] [Info] Number of data points in the train set: 1511, number of used features: 249
[LightGBM] [Info] Start training from score -1.095969
[LightGBM] [Info] Start training from score -0.465118
[LightGBM] [Info] Start training from score -3.277476
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
precision recall f1-score support
CHEMICAL 0.96 0.97 0.96 169
DISEASE 0.98 1.00 0.99 316
DRUG_DOSE 1.00 0.58 0.73 19
accuracy 0.97 504
macro avg 0.98 0.85 0.90 504
weighted avg 0.97 0.97 0.97 504
Accuracy: 0.9742063492063492
In [107]:
labels_cm_tfIdf_lgbm_2 = category_list_tfIdf_2
cm = confusion_matrix(y_test_tfIdf_2, y_pred_lgb_tfIdf_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_lgbm_2); ax.yaxis.set_ticklabels(labels_cm_tfIdf_lgbm_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
5. Multinomial NB:
In [108]:
X_train_nb_tfIdf_2, X_test_nb_tfIdf_2, y_train_nb_tfIdf_2, y_test_nb_tfIdf_2 = train_test_split(tfIdfMat_2.toarray(), labels_tfIdfMat_2, stratify=labels_tfIdfMat_2, random_state=1)
tfIdf_nb_model_2 = MultinomialNB()
tfIdf_nb_model_2.fit(X_train_nb_tfIdf_2, y_train_nb_tfIdf_2)
y_pred_nb_tfIdf_2 = tfIdf_nb_model_2.predict(X_test_nb_tfIdf_2)
print(classification_report(y_test_nb_tfIdf_2, y_pred_nb_tfIdf_2, zero_division=1))
accuracy_nb_tfIdf_2 = accuracy_score(y_test_nb_tfIdf_2, y_pred_nb_tfIdf_2)
print("Accuracy:", accuracy_nb_tfIdf_2)
precision recall f1-score support
CHEMICAL 0.89 0.94 0.91 169
DISEASE 0.97 1.00 0.98 316
DRUG_DOSE 1.00 0.00 0.00 19
accuracy 0.94 504
macro avg 0.95 0.65 0.63 504
weighted avg 0.94 0.94 0.92 504
Accuracy: 0.9404761904761905
In [109]:
labels_cm_tfIdf_nb_2 = category_list_tfIdf_2
cm = confusion_matrix(y_test_nb_tfIdf_2, y_pred_nb_tfIdf_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_tfIdf_nb_2); ax.yaxis.set_ticklabels(labels_cm_tfIdf_nb_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
With BOW:
In [110]:
X_train_bow_2, X_test_bow_2, y_train_bow_2, y_test_bow_2 = train_test_split(bow_reduced_2, labels_bow_2, stratify=labels_bow_2, random_state=1)
print('Train_Set_Size:'+str(X_train_bow_2.shape))
print('Test_Set_Size:'+str(X_test_bow_2.shape))
Train_Set_Size:(1511, 252) Test_Set_Size:(504, 252)
Ensemble Learning
In [111]:
def create_cnn_bow(input_shape, num_classes):
input_layer = Input(shape=(input_shape,))
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_lstm_bow(input_shape, num_classes):
input_layer = Input(shape=(input_shape,))
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
model = Model(inputs=input_layer, outputs=x)
return model
def create_gru_bow(input_shape, num_classes):
input_layer = Input(shape=(input_shape,))
x = Dense(64, activation='relu')(input_layer)
x = Dense(64, activation='relu')(x)
model = Model(inputs=input_layer, outputs=x)
return model
# Ensemble modeli oluşturma
def create_ensemble_bow(input_shape, num_classes):
cnn = create_cnn_bow(input_shape, num_classes)
lstm = create_lstm_bow(input_shape, num_classes)
gru = create_gru_bow(input_shape, num_classes)
concatenated = concatenate([cnn.output, lstm.output, gru.output])
x = Dense(100, activation='relu')(concatenated)
output = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[cnn.input, lstm.input, gru.input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
In [112]:
input_shape = X_train_bow_2.shape[1]
num_classes = len(category_list_bow_2)
# Creating model
model_ensemble_bow_2 = create_ensemble_bow(input_shape, num_classes)
model_ensemble_bow_2.summary()
# LabelEncoder:
label_encoder = LabelEncoder()
y_train_encoded_ensemble_bow_2 = label_encoder.fit_transform(y_train_bow_2)
y_test_encoded_ensemble_bow_2 = label_encoder.transform(y_test_bow_2)
# Train model
history_ensemble_bow_2 = model_ensemble_bow_2.fit([X_train_bow_2, X_train_bow_2, X_train_bow_2], y_train_encoded_ensemble_bow_2, epochs=1, batch_size=32)
# Predict
y_pred_ensemble_bow_2 = model_ensemble_bow_2.predict([X_test_bow_2, X_test_bow_2, X_test_bow_2])
y_pred_ensemble_bow_2 = np.argmax(y_pred_ensemble_bow_2, axis=1)
print(classification_report(y_test_encoded_ensemble_bow_2, y_pred_ensemble_bow_2, target_names=category_list_bow_2, zero_division=1))
accuracy_ensemble_bow_2 = accuracy_score(y_test_encoded_ensemble_bow_2, y_pred_ensemble_bow_2)
print("Accuracy:", accuracy_ensemble_bow_2)
Model: "model_19"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_13 (InputLayer) [(None, 252)] 0 []
dense_15 (Dense) (None, 64) 16192 ['input_13[0][0]']
input_14 (InputLayer) [(None, 252)] 0 []
input_15 (InputLayer) [(None, 252)] 0 []
dense_16 (Dense) (None, 64) 4160 ['dense_15[0][0]']
dense_18 (Dense) (None, 64) 16192 ['input_14[0][0]']
dense_20 (Dense) (None, 64) 16192 ['input_15[0][0]']
dense_17 (Dense) (None, 64) 4160 ['dense_16[0][0]']
dense_19 (Dense) (None, 64) 4160 ['dense_18[0][0]']
dense_21 (Dense) (None, 64) 4160 ['dense_20[0][0]']
concatenate_4 (Concatenate (None, 192) 0 ['dense_17[0][0]',
) 'dense_19[0][0]',
'dense_21[0][0]']
dense_22 (Dense) (None, 100) 19300 ['concatenate_4[0][0]']
dense_23 (Dense) (None, 3) 303 ['dense_22[0][0]']
==================================================================================================
Total params: 84819 (331.32 KB)
Trainable params: 84819 (331.32 KB)
Non-trainable params: 0 (0.00 Byte)
__________________________________________________________________________________________________
48/48 [==============================] - 1s 2ms/step - loss: 0.7544 - accuracy: 0.6803
16/16 [==============================] - 0s 1ms/step
precision recall f1-score support
DRUG_DOSE 0.88 0.91 0.89 169
DISEASE 0.92 0.97 0.94 316
CHEMICAL 1.00 0.00 0.00 19
accuracy 0.91 504
macro avg 0.93 0.62 0.61 504
weighted avg 0.91 0.91 0.89 504
Accuracy: 0.9087301587301587
1. LogisticRegression
In [113]:
bow_lr_model_2 = LogisticRegression(penalty= 'l1', solver= 'liblinear', random_state=1).fit(X_train_bow_2, y_train_bow_2)
y_pred_lr_bow_2 = bow_lr_model_2.predict(X_test_bow_2)
print(classification_report(y_test_bow_2, y_pred_lr_bow_2, labels=category_list_bow_2, zero_division=1))
accuracy_lr_bow_2 = accuracy_score(y_test_bow_2, y_pred_lr_bow_2)
print("Accuracy:", accuracy_lr_bow_2)
precision recall f1-score support
DRUG_DOSE 1.00 0.58 0.73 19
DISEASE 0.94 0.99 0.97 316
CHEMICAL 0.95 0.90 0.92 169
accuracy 0.95 504
macro avg 0.96 0.82 0.87 504
weighted avg 0.95 0.95 0.94 504
Accuracy: 0.9464285714285714
In [114]:
labels_cm_bow_lg_2 = category_list_bow_2
cm = confusion_matrix(y_test_bow_2, y_pred_lr_bow_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g'); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_lg_2); ax.yaxis.set_ticklabels(labels_cm_bow_lg_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
2. Random Forest
In [115]:
bow_rf_model_2 = RandomForestClassifier(random_state=1)
bow_rf_model_2.fit(X_train_bow_2, y_train_bow_2)
y_pred_rf_bow_2 = bow_rf_model_2.predict(X_test_bow_2)
print(classification_report(y_test_bow_2, y_pred_rf_bow_2))
accuracy_rf_bow_2 = accuracy_score(y_test_bow_2, y_pred_rf_bow_2)
print("Accuracy:", accuracy_rf_bow_2)
precision recall f1-score support
CHEMICAL 0.94 0.98 0.96 169
DISEASE 0.99 0.99 0.99 316
DRUG_DOSE 1.00 0.53 0.69 19
accuracy 0.97 504
macro avg 0.98 0.83 0.88 504
weighted avg 0.97 0.97 0.97 504
Accuracy: 0.9722222222222222
In [116]:
labels_cm_bow_rf_2 = category_list_bow_2
cm = confusion_matrix(y_test_bow_2, y_pred_rf_bow_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_rf_2); ax.yaxis.set_ticklabels(labels_cm_bow_rf_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
3. XGBoost
In [153]:
from sklearn.preprocessing import LabelEncoder
bow_xgb_model_2 = xgb.XGBClassifier(random_state=1)
label_encoder = LabelEncoder()
# Sınıf etiketlerini sayısal değerlere dönüştürme
y_train_encoded_2 = label_encoder.fit_transform(y_train_bow_2)
bow_xgb_model_2.fit(X_train_bow_2, y_train_encoded_2)
y_test_encoded_bow_2 = label_encoder.transform(y_test_bow_2)
y_pred_xgb_bow_2 = bow_xgb_model_2.predict(X_test_bow_2)
print(classification_report(y_test_encoded_bow_2, y_pred_xgb_bow_2))
accuracy_xgb_bow_2 = accuracy_score(y_test_encoded_bow_2, y_pred_xgb_bow_2)
print("Accuracy:", accuracy_xgb_bow_2)
precision recall f1-score support
0 0.96 0.98 0.97 169
1 0.99 1.00 1.00 316
2 1.00 0.63 0.77 19
accuracy 0.98 504
macro avg 0.98 0.87 0.91 504
weighted avg 0.98 0.98 0.98 504
Accuracy: 0.9801587301587301
In [154]:
labels_cm_bow_xgb_2 = category_list_bow_2
cm = confusion_matrix(y_test_encoded_bow_2, y_pred_xgb_bow_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_xgb_2); ax.yaxis.set_ticklabels(labels_cm_bow_xgb_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
4. LightGBM:
In [119]:
bow_lgb_model_2 = lgb.LGBMClassifier(random_state=1)
bow_lgb_model_2.fit(X_train_bow_2, y_train_bow_2)
y_pred_lgb_bow_2 = bow_lgb_model_2.predict(X_test_bow_2)
print(classification_report(y_test_bow_2, y_pred_lgb_bow_2))
accuracy_lgb_bow_2 = accuracy_score(y_test_bow_2, y_pred_lgb_bow_2)
print("Accuracy:", accuracy_lgb_bow_2)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003863 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 64034
[LightGBM] [Info] Number of data points in the train set: 1511, number of used features: 252
[LightGBM] [Info] Start training from score -1.095969
[LightGBM] [Info] Start training from score -0.465118
[LightGBM] [Info] Start training from score -3.277476
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
precision recall f1-score support
CHEMICAL 0.96 0.96 0.96 169
DISEASE 0.99 1.00 1.00 316
DRUG_DOSE 0.80 0.63 0.71 19
accuracy 0.97 504
macro avg 0.92 0.87 0.89 504
weighted avg 0.97 0.97 0.97 504
Accuracy: 0.9742063492063492
In [120]:
labels_cm_bow_lgbm_2 = category_list_bow_2
cm = confusion_matrix(y_test_bow_2, y_pred_lgb_bow_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_lgbm_2); ax.yaxis.set_ticklabels(labels_cm_bow_lgbm_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
5. Multinomial NB:
In [121]:
X_train_nb_bow_2, X_test_nb_bow_2, y_train_nb_bow_2, y_test_nb_bow_2 = train_test_split(bow_matrix_2.toarray(), labels_bow_2, stratify=labels_bow_2, random_state=1)
bow_nb_model_2 = MultinomialNB()
bow_nb_model_2.fit(X_train_nb_bow_2, y_train_nb_bow_2)
y_pred_nb_bow_2 = bow_nb_model_2.predict(X_test_nb_bow_2)
print(classification_report(y_test_nb_bow_2, y_pred_nb_bow_2, zero_division=1))
accuracy_nb_bow_2 = accuracy_score(y_test_nb_bow_2, y_pred_nb_bow_2)
print("Accuracy:", accuracy_nb_bow_2)
precision recall f1-score support
CHEMICAL 0.88 0.96 0.92 169
DISEASE 0.98 0.97 0.97 316
DRUG_DOSE 0.83 0.26 0.40 19
accuracy 0.94 504
macro avg 0.90 0.73 0.76 504
weighted avg 0.94 0.94 0.93 504
Accuracy: 0.9384920634920635
In [122]:
labels_cm_bow_nb_2 = category_list_bow_2
cm = confusion_matrix(y_test_nb_bow_2, y_pred_nb_bow_2)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_bow_nb_2); ax.yaxis.set_ticklabels(labels_cm_bow_nb_2);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
Phase-1 and Phase-2 Comparison
Phase-1
In [136]:
from IPython.display import display
# With TF-IDF
model_names = ['Ensemble Learning Model', 'LogisticRegression', 'Random Forest', 'XGBoost', 'LightGBM', 'Multinomial NB']
accuracy_values_tfIdf = [(accuracy_ensemble_tfIdf * 100), (accuracy_lr_tfIdf * 100),
(accuracy_rf_tfIdf * 100), (accuracy_xgb_tfIdf * 100),
(accuracy_lgb_tfIdf * 100), (accuracy_nb_tfIdf * 100)]
comparison_table_tfIdf = pd.DataFrame({
'Model': model_names,
'Accuracy': accuracy_values_tfIdf
})
# With Bow
accuracy_values_bow = [(accuracy_ensemble_bow * 100), (accuracy_lr_bow * 100),
(accuracy_rf_bow * 100), (accuracy_xgb_bow * 100),
(accuracy_lgb_bow * 100), (accuracy_nb_bow * 100)]
comparison_table_bow = pd.DataFrame({
'Model': model_names,
'Accuracy': accuracy_values_bow
})
comparison_table_phase_1 = pd.merge(comparison_table_tfIdf, comparison_table_bow, on='Model', suffixes=(' TF-IDF', ' BoW'))
display(comparison_table_phase_1)
| Model | Accuracy TF-IDF | Accuracy BoW | |
|---|---|---|---|
| 0 | Ensemble Learning Model | 23.652174 | 37.130435 |
| 1 | LogisticRegression | 38.173913 | 27.217391 |
| 2 | Random Forest | 10.956522 | 10.782609 |
| 3 | XGBoost | 12.000000 | 11.565217 |
| 4 | LightGBM | 11.478261 | 11.478261 |
| 5 | Multinomial NB | 37.565217 | 33.739130 |
Phase-2
In [137]:
# With TF-IDF
model_names = ['Ensemble Learning Model', 'LogisticRegression', 'Random Forest', 'XGBoost', 'LightGBM', 'Multinomial NB']
accuracy_values_tfIdf_2 = [(accuracy_ensemble_tfIdf_2 * 100), (accuracy_lr_tfIdf_2 * 100),
(accuracy_rf_tfIdf_2 * 100), (accuracy_xgb_tfIdf_2 * 100),
(accuracy_lgb_tfIdf_2 * 100), (accuracy_nb_tfIdf_2 * 100)]
comparison_table_tfIdf_2 = pd.DataFrame({
'Model': model_names,
'Accuracy': accuracy_values_tfIdf_2
})
# With BOW
accuracy_values_bow_2 = [(accuracy_ensemble_bow_2 * 100), (accuracy_lr_bow_2 * 100),
(accuracy_rf_bow_2 * 100), (accuracy_xgb_bow_2 * 100),
(accuracy_lgb_bow_2 * 100), (accuracy_nb_bow_2 * 100)]
comparison_table_bow_2 = pd.DataFrame({
'Model': model_names,
'Accuracy': accuracy_values_bow_2
})
comparison_table_phase_2 = pd.merge(comparison_table_tfIdf_2, comparison_table_bow_2, on='Model', suffixes=(' TF-IDF', ' BoW'))
display(comparison_table_combined_2)
| Model | Accuracy TF-IDF | Accuracy BoW | |
|---|---|---|---|
| 0 | Ensemble Learning Model | 62.698413 | 90.873016 |
| 1 | LogisticRegression | 95.238095 | 94.642857 |
| 2 | Random Forest | 97.222222 | 97.222222 |
| 3 | XGBoost | 97.420635 | 98.015873 |
| 4 | LightGBM | 97.420635 | 97.420635 |
| 5 | Multinomial NB | 94.047619 | 93.849206 |
In [138]:
comparison_table_combined = pd.merge(comparison_table_combined, comparison_table_combined_2, on='Model', suffixes=(' Phase 1', ' Phase 2'))
display(comparison_table_combined)
| Model | Accuracy TF-IDF Phase 1 | Accuracy BoW Phase 1 | Accuracy TF-IDF Phase 2 | Accuracy BoW Phase 2 | |
|---|---|---|---|---|---|
| 0 | Ensemble Learning Model | 23.652174 | 37.130435 | 62.698413 | 90.873016 |
| 1 | LogisticRegression | 38.173913 | 27.217391 | 95.238095 | 94.642857 |
| 2 | Random Forest | 10.956522 | 10.782609 | 97.222222 | 97.222222 |
| 3 | XGBoost | 12.000000 | 11.565217 | 97.420635 | 98.015873 |
| 4 | LightGBM | 11.478261 | 11.478261 | 97.420635 | 97.420635 |
| 5 | Multinomial NB | 37.565217 | 33.739130 | 94.047619 | 93.849206 |
In [143]:
# Finding the best model in Phase 1
best_model_phase1 = comparison_table_combined_final.loc[comparison_table_combined_final['Accuracy TF-IDF Phase 1'].idxmax(), 'Model']
best_accuracy_phase1 = comparison_table_combined_final['Accuracy TF-IDF Phase 1'].max()
best_model_phase1_bow = comparison_table_combined_final.loc[comparison_table_combined_final['Accuracy BoW Phase 1'].idxmax(), 'Model']
best_accuracy_phase1_bow = comparison_table_combined_final['Accuracy BoW Phase 1'].max()
# Finding the best model in Phase 2
best_model_phase2 = comparison_table_combined_final.loc[comparison_table_combined_final['Accuracy TF-IDF Phase 2'].idxmax(), 'Model']
best_accuracy_phase2 = comparison_table_combined_final['Accuracy TF-IDF Phase 2'].max()
best_model_phase2_bow = comparison_table_combined_final.loc[comparison_table_combined_final['Accuracy BoW Phase 2'].idxmax(), 'Model']
best_accuracy_phase2_bow = comparison_table_combined_final['Accuracy BoW Phase 2'].max()
# Printing the model with the highest accuracy and its phase information
if best_accuracy_phase1 > best_accuracy_phase1_bow and best_accuracy_phase1 > best_accuracy_phase2 and best_accuracy_phase1 > best_accuracy_phase2_bow:
print(f"The model with the highest TF-IDF accuracy: {best_model_phase1}, Phase: 1, Accuracy: {best_accuracy_phase1}%")
elif best_accuracy_phase1_bow > best_accuracy_phase1 and best_accuracy_phase1_bow > best_accuracy_phase2 and best_accuracy_phase1_bow > best_accuracy_phase2_bow:
print(f"The model with the highest BoW accuracy: {best_model_phase1_bow}, Phase: 1, Accuracy: {best_accuracy_phase1_bow}%")
elif best_accuracy_phase2 > best_accuracy_phase1 and best_accuracy_phase2 > best_accuracy_phase1_bow and best_accuracy_phase2 > best_accuracy_phase2_bow:
print(f"The model with the highest TF-IDF accuracy: {best_model_phase2}, Phase: 2, Accuracy: {best_accuracy_phase2}%")
else:
print(f"The model with the highest BoW accuracy: {best_model_phase2_bow}, Phase: 2, Accuracy: {best_accuracy_phase2_bow}%")
The model with the highest BoW accuracy: XGBoost, Phase: 2, Accuracy: 98.01587301587301%
Phase 3 (Third Paragraph)
In [171]:
smote_over_sample = SMOTE(sampling_strategy='minority')
labels_smote = ner_model_df['ner_category_label'].tolist()
X, y = smote_over_sample.fit_resample(bow_reduced_2, labels_smote)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,random_state=1)
print('Train_Set_Size:'+str(X_train.shape))
print('Test_Set_Size:'+str(X_test.shape))
Train_Set_Size:(2403, 252) Test_Set_Size:(801, 252)
In [172]:
from sklearn.preprocessing import LabelEncoder
# Convert labels to numeric values
label_encoder = LabelEncoder()
y_train_encoded_smote = label_encoder.fit_transform(y_train)
y_test_encoded_smote = label_encoder.transform(y_test)
# Train
xgb_model_smote = xgb.XGBClassifier()
xgb_model_smote.fit(X_train, y_train_encoded_smote)
# Predict
y_pred_smote = xgb_model.predict(X_test)
accuracy_xgb_smote = accuracy_score(y_test_encoded_smote, y_pred_smote)
print("Accuracy:", accuracy_xgb_smote)
Accuracy: 0.9850187265917603
In [173]:
print(classification_report(y_test_encoded_smote, y_pred_smote, zero_division=1))
precision recall f1-score support
0 0.98 0.95 0.96 169
1 0.99 1.00 1.00 316
2 0.98 0.99 0.99 316
accuracy 0.99 801
macro avg 0.98 0.98 0.98 801
weighted avg 0.98 0.99 0.98 801
In [176]:
labels_cm_smote = category_list_bow_2
cm = confusion_matrix(y_test_encoded_smote, y_pred_smote)
fig = plt.figure(figsize=(20,20))
ax= fig.add_subplot(1,1,1)
sns.heatmap(cm, annot=True, cmap="Greens",ax = ax,fmt='g');
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(labels_cm_smote); ax.yaxis.set_ticklabels(labels_cm_smote);
plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
plt.show()
Comparison for Phase 3
In [175]:
models_names_final = ['BoW XGBoost Phase 2', 'Bow XGBoost with SMOTE']
accuracy_values_final = [(accuracy_xgb_bow_2 * 100), (accuracy_xgb_smote * 100)]
comparison_table_final = pd.DataFrame({
'Model': models_names_final,
'Accuracy': accuracy_values_final
})
display(comparison_table_final)
| Model | Accuracy | |
|---|---|---|
| 0 | BoW XGBoost Phase 2 | 98.015873 |
| 1 | Bow XGBoost with SMOTE | 98.501873 |
In [179]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
precision_xgb_bow_2 = precision_score(y_test_encoded_bow_2, y_pred_xgb_bow_2, average='weighted')
recall_xgb_bow_2= recall_score(y_test_encoded_bow_2, y_pred_xgb_bow_2, average='weighted')
f1_xgb_bow_2 = f1_score(y_test_encoded_bow_2, y_pred_xgb_bow_2, average='weighted')
precision_xgb_smote = precision_score(y_test_encoded_smote, y_pred_smote, average='weighted')
recall_xgb_smote = recall_score(y_test_encoded_smote, y_pred_smote, average='weighted')
f1_xgb_smote = f1_score(y_test_encoded_smote, y_pred_smote, average='weighted')
metrics_1 = {
"Accuracy": accuracy_xgb_bow_2 * 100,
"Precision": precision_xgb_bow_2 * 100,
"Recall": recall_xgb_bow_2 * 100,
"F1 Score": f1_xgb_bow_2 * 100
}
metrics_2 = {
"Accuracy": accuracy_xgb_smote * 100,
"Precision": precision_xgb_smote * 100,
"Recall": recall_xgb_smote * 100,
"F1 Score": f1_xgb_smote * 100
}
df_result_table = pd.DataFrame([metrics_1, metrics_2],
index=['BoW XGBoost Phase 2', 'BoW XGBoost with SMOTE'])
display(df_result_table)
| Accuracy | Precision | Recall | F1 Score | |
|---|---|---|---|---|
| BoW XGBoost Phase 2 | 98.015873 | 98.053584 | 98.015873 | 97.872072 |
| BoW XGBoost with SMOTE | 98.501873 | 98.498653 | 98.501873 | 98.491874 |
In [ ]: